lock: Fix edge cases where data loss could occur in v6 mode.

In the case where the pointer file is in place, and not the content
of the object, lock's  performNew was called with filemodified=True,
which caused it to try to repopulate the object from an unmodified
associated file, of which there were none. So, the content of the object
got thrown away incorrectly. This was the cause (although not the root
cause) of data loss in https://github.com/datalad/datalad/issues/1020

The same problem could also occur when the work tree file is modified,
but the object is not, and lock is called with --force. Added a test case
for this, since it's excercising the same code path and is easier to set up
than the problem above.

Note that this only occurred when the keys database did not have an inode
cache recorded for the annex object. Normally, the annex object would be in
there, but there are of course circumstances where the inode cache is out
of sync with reality, since it's only a cache.

Fixed by checking if the object is unmodified; if so we don't need to
try to repopulate it. This does add an additional checksum to the unlock
path, but it's already checksumming the worktree file in another case,
so it doesn't slow it down overall.

Further investigation found a similar problem occurred when smudge --clean
is called on a file and the inode cache is not populated. cleanOldKeys
deleted the unmodified old object file in this case. This was also
fixed by checking if the object is unmodified.

In general, use of getInodeCaches and sameInodeCache is potentially
dangerous if the inode cache has not gotten populated for some reason.
Better to use isUnmodified. I breifly auited other places that check the
inode cache, and did not see any immediate problems, but it would be easy
to miss this kind of problem.
This commit is contained in:
Joey Hess 2016-10-17 12:56:26 -04:00
parent 7baa96224f
commit ee309d6941
No known key found for this signature in database
GPG key ID: C910D9222512E3C7
4 changed files with 47 additions and 25 deletions

View file

@ -244,10 +244,9 @@ cleanOldKeys file newkey = do
topf <- inRepo (toTopFilePath file) topf <- inRepo (toTopFilePath file)
oldkeys <- filter (/= newkey) oldkeys <- filter (/= newkey)
<$> Database.Keys.getAssociatedKey topf <$> Database.Keys.getAssociatedKey topf
forM_ oldkeys $ \key -> do forM_ oldkeys $ \key ->
obj <- calcRepo (gitAnnexLocation key) unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do
caches <- Database.Keys.getInodeCaches key caches <- Database.Keys.getInodeCaches key
unlessM (sameInodeCache obj caches) $ do
unlinkAnnex key unlinkAnnex key
fs <- filter (/= ingestedf) fs <- filter (/= ingestedf)
. map (`fromTopFilePath` g) . map (`fromTopFilePath` g)

View file

@ -1,3 +1,10 @@
git-annex (6.20161013) UNRELEASED; urgency=medium
* lock, smudge: Fix edge cases where data loss could occur in v6 mode
when the keys database was not populated.
-- Joey Hess <id@joeyh.name> Mon, 17 Oct 2016 12:46:54 -0400
git-annex (6.20161012) unstable; urgency=medium git-annex (6.20161012) unstable; urgency=medium
* Optimisations to time it takes git-annex to walk working tree and find * Optimisations to time it takes git-annex to walk working tree and find

View file

@ -45,27 +45,27 @@ startNew file key = ifM (isJust <$> isAnnexLink file)
) )
where where
go (Just key') go (Just key')
| key' == key = cont True | key' == key = cont
| otherwise = errorModified | otherwise = errorModified
go Nothing = go Nothing =
ifM (isUnmodified key file) ifM (isUnmodified key file)
( cont False ( cont
, ifM (Annex.getState Annex.force) , ifM (Annex.getState Annex.force)
( cont True ( cont
, errorModified , errorModified
) )
) )
cont = next . performNew file key cont = next $ performNew file key
performNew :: FilePath -> Key -> Bool -> CommandPerform performNew :: FilePath -> Key -> CommandPerform
performNew file key filemodified = do performNew file key = do
lockdown =<< calcRepo (gitAnnexLocation key) lockdown =<< calcRepo (gitAnnexLocation key)
addLink file key addLink file key
=<< withTSDelta (liftIO . genInodeCache file) =<< withTSDelta (liftIO . genInodeCache file)
next $ cleanupNew file key next $ cleanupNew file key
where where
lockdown obj = do lockdown obj = do
ifM (catchBoolIO $ sameInodeCache obj =<< Database.Keys.getInodeCaches key) ifM (isUnmodified key obj)
( breakhardlink obj ( breakhardlink obj
, repopulate obj , repopulate obj
) )
@ -83,20 +83,18 @@ performNew file key filemodified = do
Database.Keys.storeInodeCaches key [obj] Database.Keys.storeInodeCaches key [obj]
-- Try to repopulate obj from an unmodified associated file. -- Try to repopulate obj from an unmodified associated file.
repopulate obj repopulate obj = modifyContent obj $ do
| filemodified = modifyContent obj $ do g <- Annex.gitRepo
g <- Annex.gitRepo fs <- map (`fromTopFilePath` g)
fs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key
<$> Database.Keys.getAssociatedFiles key mfile <- firstM (isUnmodified key) fs
mfile <- firstM (isUnmodified key) fs liftIO $ nukeFile obj
liftIO $ nukeFile obj case mfile of
case mfile of Just unmodified ->
Just unmodified -> unlessM (checkedCopyFile key unmodified obj Nothing)
unlessM (checkedCopyFile key unmodified obj Nothing) lostcontent
lostcontent Nothing -> lostcontent
Nothing -> lostcontent
| otherwise = modifyContent obj $
liftIO $ renameFile file obj
lostcontent = logStatus key InfoMissing lostcontent = logStatus key InfoMissing
cleanupNew :: FilePath -> Key -> CommandCleanup cleanupNew :: FilePath -> Key -> CommandCleanup

18
Test.hs
View file

@ -212,6 +212,7 @@ unitTests note = testGroup ("Unit Tests " ++ note)
, testCase "move" test_move , testCase "move" test_move
, testCase "copy" test_copy , testCase "copy" test_copy
, testCase "lock" test_lock , testCase "lock" test_lock
, testCase "lock (v6 --force)" test_lock_v6_force
, testCase "edit (no pre-commit)" test_edit , testCase "edit (no pre-commit)" test_edit
, testCase "edit (pre-commit)" test_edit_precommit , testCase "edit (pre-commit)" test_edit_precommit
, testCase "partial commit" test_partial_commit , testCase "partial commit" test_partial_commit
@ -613,6 +614,23 @@ test_lock = intmpclonerepoInDirect $ do
r' <- git_annex "drop" [annexedfile] r' <- git_annex "drop" [annexedfile]
not r' @? "drop wrongly succeeded with no known copy of modified file" not r' @? "drop wrongly succeeded with no known copy of modified file"
-- Regression test: lock --force when work tree file
-- was modified lost the (unmodified) annex object.
-- (Only occurred when the keys database was out of sync.)
test_lock_v6_force :: Assertion
test_lock_v6_force = intmpclonerepoInDirect $ do
git_annex "upgrade" [] @? "upgrade failed"
whenM (annexeval Annex.Version.versionSupportsUnlockedPointers) $ do
git_annex "get" [annexedfile] @? "get of file failed"
git_annex "unlock" [annexedfile] @? "unlock failed in v6 mode"
annexeval $ do
dbdir <- Annex.fromRepo Annex.Locations.gitAnnexKeysDb
liftIO $ removeDirectoryRecursive dbdir
writeFile annexedfile "test_lock_v6_force content"
not <$> git_annex "lock" [annexedfile] @? "lock of modified file failed to fail in v6 mode"
git_annex "lock" ["--force", annexedfile] @? "lock --force of modified file failed in v6 mode"
annexed_present_locked annexedfile
test_edit :: Assertion test_edit :: Assertion
test_edit = test_edit' False test_edit = test_edit' False