diff --git a/CHANGELOG b/CHANGELOG index a1f7d67604..a0726c378e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -29,6 +29,8 @@ git-annex (7.20190220) UNRELEASED; urgency=medium to support filenames starting with dashes. (To update the config of existing repositories, you can re-run git-annex init.) + * fsck: Detect situations where annex.thin has caused data loss + to the content of locked files. -- Joey Hess Wed, 20 Feb 2019 14:20:59 -0400 diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 43a892cf25..11f9863312 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2018 Joey Hess + - Copyright 2010-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -227,7 +227,7 @@ verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool verifyLocationLog key keystatus ai = do direct <- isDirect obj <- calcRepo $ gitAnnexLocation key - present <- if not direct && isKeyUnlocked keystatus + present <- if not direct && isKeyUnlockedThin keystatus then liftIO (doesFileExist obj) else inAnnex key u <- getUUID @@ -235,9 +235,10 @@ verifyLocationLog key keystatus ai = do {- Since we're checking that a key's object file is present, throw - in a permission fixup here too. -} when (present && not direct) $ do - void $ tryIO $ if isKeyUnlocked keystatus - then thawContent obj - else freezeContent obj + void $ tryIO $ case keystatus of + KeyUnlockedThin -> thawContent obj + KeyLockedThin -> thawContent obj + _ -> freezeContent obj unlessM (isContentWritePermOk obj) $ warning $ "** Unable to set correct write mode for " ++ obj ++ " ; perhaps you don't own that file" whenM (liftIO $ doesDirectoryExist $ parentDir obj) $ @@ -326,13 +327,11 @@ verifyAssociatedFiles key keystatus file = do forM_ fs $ \f -> unlessM (liftIO $ doesFileExist f) $ void $ Direct.removeAssociatedFile key f - goindirect = case keystatus of - KeyUnlocked -> do - f <- inRepo $ toTopFilePath file - afs <- Database.Keys.getAssociatedFiles key - unless (getTopFilePath f `elem` map getTopFilePath afs) $ - Database.Keys.addAssociatedFile key f - _ -> return () + goindirect = when (isKeyUnlockedThin keystatus) $ do + f <- inRepo $ toTopFilePath file + afs <- Database.Keys.getAssociatedFiles key + unless (getTopFilePath f `elem` map getTopFilePath afs) $ + Database.Keys.addAssociatedFile key f verifyWorkTree :: Key -> FilePath -> Annex Bool verifyWorkTree key file = do @@ -372,7 +371,7 @@ verifyWorkTree key file = do - Not checked when a file is unlocked, or in direct mode. -} checkKeySize :: Key -> KeyStatus -> ActionItem -> Annex Bool -checkKeySize _ KeyUnlocked _ = return True +checkKeySize _ KeyUnlockedThin _ = return True checkKeySize key _ ai = do file <- calcRepo $ gitAnnexLocation key ifM (liftIO $ doesFileExist file) @@ -450,7 +449,7 @@ checkBackend backend key keystatus afile = go =<< isDirect where go False = do content <- calcRepo $ gitAnnexLocation key - ifM (pure (isKeyUnlocked keystatus) <&&> (not <$> isUnmodified key content)) + ifM (pure (isKeyUnlockedThin keystatus) <&&> (not <$> isUnmodified key content)) ( nocheck , checkBackendOr badContent backend key content (mkActionItem afile) ) @@ -700,28 +699,42 @@ withFsckDb (StartIncremental h) a = a h withFsckDb NonIncremental _ = noop withFsckDb (ScheduleIncremental _ _ i) a = withFsckDb i a -data KeyStatus = KeyLocked | KeyUnlocked | KeyMissing +data KeyStatus + = KeyMissing + | KeyPresent + | KeyUnlockedThin + -- ^ An annex.thin worktree file is hard linked to the object. + | KeyLockedThin + -- ^ The object has hard links, but the file being fscked + -- is not the one that hard links to it. + deriving (Show) -isKeyUnlocked :: KeyStatus -> Bool -isKeyUnlocked KeyUnlocked = True -isKeyUnlocked KeyLocked = False -isKeyUnlocked KeyMissing = False +isKeyUnlockedThin :: KeyStatus -> Bool +isKeyUnlockedThin KeyUnlockedThin = True +isKeyUnlockedThin KeyLockedThin = False +isKeyUnlockedThin KeyPresent = False +isKeyUnlockedThin KeyMissing = False getKeyStatus :: Key -> Annex KeyStatus getKeyStatus key = ifM isDirect - ( return KeyUnlocked + ( return KeyUnlockedThin , catchDefaultIO KeyMissing $ do - unlocked <- not . null <$> Database.Keys.getAssociatedFiles key - return $ if unlocked then KeyUnlocked else KeyLocked + afs <- not . null <$> Database.Keys.getAssociatedFiles key + obj <- calcRepo $ gitAnnexLocation key + multilink <- ((> 1) . linkCount <$> liftIO (getFileStatus obj)) + return $ if multilink && afs + then KeyUnlockedThin + else KeyPresent ) getKeyFileStatus :: Key -> FilePath -> Annex KeyStatus getKeyFileStatus key file = do s <- getKeyStatus key case s of - KeyLocked -> catchDefaultIO KeyLocked $ + KeyUnlockedThin -> catchDefaultIO KeyUnlockedThin $ ifM (isJust <$> isAnnexLink file) - ( return KeyLocked - , return KeyUnlocked + ( return KeyLockedThin + , return KeyUnlockedThin ) _ -> return s + diff --git a/Command/Migrate.hs b/Command/Migrate.hs index dc0e32b932..0f9471b41d 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -70,7 +70,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbacken go (Just (newkey, knowngoodcontent)) | knowngoodcontent = finish newkey | otherwise = stopUnless checkcontent $ finish newkey - checkcontent = Command.Fsck.checkBackend oldbackend oldkey Command.Fsck.KeyLocked afile + checkcontent = Command.Fsck.checkBackend oldbackend oldkey Command.Fsck.KeyPresent afile finish newkey = ifM (Command.ReKey.linkKey file oldkey newkey) ( do _ <- copyMetaData oldkey newkey diff --git a/doc/bugs/data_loss_from_dedup_when_v7_unlocked_file_is_duplicated__47__truncated.mdwn b/doc/bugs/data_loss_from_dedup_when_v7_unlocked_file_is_duplicated__47__truncated.mdwn index 663acc53cf..b989fee7dc 100644 --- a/doc/bugs/data_loss_from_dedup_when_v7_unlocked_file_is_duplicated__47__truncated.mdwn +++ b/doc/bugs/data_loss_from_dedup_when_v7_unlocked_file_is_duplicated__47__truncated.mdwn @@ -110,3 +110,5 @@ fsck ffoo ok ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) I'm hopeful! But just getting started :( +> git-annex fsck made to detect and clean up after this, which I consider +> sufficient, so [[done]] --[[Joey]] diff --git a/doc/bugs/data_loss_from_dedup_when_v7_unlocked_file_is_duplicated__47__truncated/comment_3_82afe6abb793aef90e3ddc42c2e5c3f2._comment b/doc/bugs/data_loss_from_dedup_when_v7_unlocked_file_is_duplicated__47__truncated/comment_3_82afe6abb793aef90e3ddc42c2e5c3f2._comment new file mode 100644 index 0000000000..e14adcc5ba --- /dev/null +++ b/doc/bugs/data_loss_from_dedup_when_v7_unlocked_file_is_duplicated__47__truncated/comment_3_82afe6abb793aef90e3ddc42c2e5c3f2._comment @@ -0,0 +1,30 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2019-03-18T18:16:53Z" + content=""" +annex.thin does allow data loss to happen, and just because there's +another annex link in the repository that points at the same object +file does not mean data loss is not acceptable. + +While it might seem that, when adding the file with dup content, git-annex +could notice that the object file has been modified, and so replace it with +the new copy from ffoo, that would leave the same problem in +this equivilant situation: + + # echo hi > 1 + # echo hi > 2 + # git annex add 1 2 + # git annex unlock 1 + # echo bye > 1 + # cat 2 + bye + +The surprising thing to me is that `git annex fsck 2` (or ffoo in your +example) doesn't find any problem with it, despite it pointing at a +changed object file that doesn't have the right hash. + +Fsck doesn't want to complain about *expected* data loss when an unlocked +file has been modified and annex.thin caused the old version to be lost. +But, when fscking an annex symlink, that shouldn't apply. +"""]]