From e7183d83d367bb52f502266b11b5b6dff683279e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Dec 2015 16:05:56 -0400 Subject: [PATCH] fsck for v6 unlocked files This only adds 1 stat to each file fscked for locked files, so added overhead is minimal. For unlocked files it has to access the database to see if a file is modified. --- CmdLine/Seek.hs | 2 +- Command/Fsck.hs | 99 ++++++++++++++++++++++++++++---------------- Command/Migrate.hs | 2 +- doc/todo/smudge.mdwn | 1 - 4 files changed, 66 insertions(+), 38 deletions(-) diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 48545ce042..f4ac4dfada 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -125,7 +125,7 @@ withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged withFilesUnlockedToBeCommitted :: (FilePath -> CommandStart) -> CmdParams -> CommandSeek withFilesUnlockedToBeCommitted = withFilesUnlocked' LsFiles.typeChangedStaged -{- Unlocked files have changed type from a symlink to a regular file. +{- Unlocked files before v6 have changed type from a symlink to a regular file. - - Furthermore, unlocked files used to be a git-annex symlink, - not some other sort of symlink. diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 1531d2ab76..74e83670c1 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -34,6 +34,7 @@ import Utility.HumanTime import Utility.CopyFile import Git.FilePath import Utility.PID +import qualified Database.Keys #ifdef WITH_DATABASE import qualified Database.Fsck as FsckDb @@ -118,16 +119,18 @@ start from inc file key = do go = runFsck inc file key perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool -perform key file backend numcopies = check - -- order matters - [ fixLink key file - , verifyLocationLog key file - , verifyDirectMapping key file - , verifyDirectMode key file - , checkKeySize key - , checkBackend backend key (Just file) - , checkKeyNumCopies key (Just file) numcopies - ] +perform key file backend numcopies = do + keystatus <- getKeyStatus key + check + -- order matters + [ fixLink key file + , verifyLocationLog key keystatus file + , verifyDirectMapping key file + , verifyDirectMode key file + , checkKeySize key keystatus + , checkBackend backend key keystatus (Just file) + , checkKeyNumCopies key (Just file) numcopies + ] {- To fsck a remote, the content is retrieved to a tmp file, - and checked locally. -} @@ -183,19 +186,19 @@ startKey inc key numcopies = performKey key backend numcopies performKey :: Key -> Backend -> NumCopies -> Annex Bool -performKey key backend numcopies = check - [ verifyLocationLog key (key2file key) - , checkKeySize key - , checkBackend backend key Nothing - , checkKeyNumCopies key Nothing numcopies - ] +performKey key backend numcopies = do + keystatus <- getKeyStatus key + check + [ verifyLocationLog key keystatus (key2file key) + , checkKeySize key keystatus + , checkBackend backend key keystatus Nothing + , checkKeyNumCopies key Nothing numcopies + ] check :: [Annex Bool] -> Annex Bool check cs = and <$> sequence cs -{- Checks that the file's link points correctly to the content. - - - - In direct mode, there is only a link when the content is not present. +{- Checks that symlinks points correctly to the annexed content. -} fixLink :: Key -> FilePath -> Annex Bool fixLink key file = do @@ -214,19 +217,23 @@ fixLink key file = do {- Checks that the location log reflects the current status of the key, - in this repository only. -} -verifyLocationLog :: Key -> String -> Annex Bool -verifyLocationLog key desc = do - present <- inAnnex key +verifyLocationLog :: Key -> KeyStatus -> String -> Annex Bool +verifyLocationLog key keystatus desc = do + obj <- calcRepo $ gitAnnexLocation key + present <- if isKeyUnlocked keystatus + then liftIO (doesFileExist obj) + else inAnnex key direct <- isDirect u <- getUUID - {- Since we're checking that a key's file is present, throw + {- Since we're checking that a key's object file is present, throw - in a permission fixup here too. -} - file <- calcRepo $ gitAnnexLocation key - when (present && not direct) $ - freezeContent file - whenM (liftIO $ doesDirectoryExist $ parentDir file) $ - freezeContentDir file + when (present && not direct) $ void $ tryIO $ + if isKeyUnlocked keystatus + then thawContent obj + else freezeContent obj + whenM (liftIO $ doesDirectoryExist $ parentDir obj) $ + freezeContentDir obj {- In direct mode, modified files will show up as not present, - but that is expected and not something to do anything about. -} @@ -288,10 +295,11 @@ verifyDirectMode key file = do {- The size of the data for a key is checked against the size encoded in - the key's metadata, if available. - - - Not checked in direct mode, because files can be changed directly. + - Not checked when a file is unlocked, or in direct mode. -} -checkKeySize :: Key -> Annex Bool -checkKeySize key = ifM isDirect +checkKeySize :: Key -> KeyStatus -> Annex Bool +checkKeySize _ KeyUnlocked = return True +checkKeySize key KeyLocked = ifM isDirect ( return True , do file <- calcRepo $ gitAnnexLocation key @@ -326,18 +334,26 @@ checkKeySizeOr bad key file = case Types.Key.keySize key of , msg ] -{- Runs the backend specific check on a key's content. +{- Runs the backend specific check on a key's content object. + - + - When a file is unlocked, it may be a hard link to the object, + - thus when the user modifies the file, the object will be modified and + - not pass the check, and we don't want to find an error in this case. + - So, skip the check if the key is unlocked and modified. - - In direct mode this is not done if the file has clearly been modified, - because modification of direct mode files is allowed. It's still done - if the file does not appear modified, to catch disk corruption, etc. -} -checkBackend :: Backend -> Key -> Maybe FilePath -> Annex Bool -checkBackend backend key mfile = go =<< isDirect +checkBackend :: Backend -> Key -> KeyStatus -> Maybe FilePath -> Annex Bool +checkBackend backend key keystatus mfile = go =<< isDirect where go False = do content <- calcRepo $ gitAnnexLocation key - checkBackendOr badContent backend key content + ifM (pure (isKeyUnlocked keystatus) <&&> (not <$> isUnmodified key content)) + ( nocheck + , checkBackendOr badContent backend key content + ) go True = maybe nocheck checkdirect mfile checkdirect file = ifM (goodContent key file) ( checkBackendOr' (badContentDirect file) backend key file @@ -582,3 +598,16 @@ withFsckDb (StartIncremental h) a = a h withFsckDb NonIncremental _ = noop withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a #endif + +data KeyStatus = KeyLocked | KeyUnlocked + +isKeyUnlocked :: KeyStatus -> Bool +isKeyUnlocked KeyUnlocked = True +isKeyUnlocked KeyLocked = False + +getKeyStatus :: Key -> Annex KeyStatus +getKeyStatus key = do + obj <- calcRepo $ gitAnnexLocation key + unlocked <- ((> 1) . linkCount <$> liftIO (getFileStatus obj)) + <&&> (not . null <$> Database.Keys.getAssociatedFiles key) + return $ if unlocked then KeyUnlocked else KeyLocked diff --git a/Command/Migrate.hs b/Command/Migrate.hs index d1c7902d7d..b8d2eea877 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -72,7 +72,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey go (Just (newkey, knowngoodcontent)) | knowngoodcontent = finish newkey | otherwise = stopUnless checkcontent $ finish newkey - checkcontent = Command.Fsck.checkBackend oldbackend oldkey $ Just file + checkcontent = Command.Fsck.checkBackend oldbackend oldkey Command.Fsck.KeyLocked $ Just file finish newkey = stopUnless (Command.ReKey.linkKey oldkey newkey) $ next $ Command.ReKey.cleanup file oldkey newkey genkey = case maybe Nothing (\fm -> fm oldkey newbackend (Just file)) (fastMigrate oldbackend) of diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index ce1db34a24..5cff8672cc 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -336,7 +336,6 @@ files to be unlocked, while the indirect upgrades don't touch the files. long-lived processes. * Make v6 upgrade convert direct mode repo to repo with all unlocked files. -* fsck will need some fixes to handle unlocked files. * Make automatic merge conflict resolution work for pointer files. - Should probably automatically handle merge conflicts between annex symlinks and pointer files too. Maybe by always resulting in a pointer