From 3b5a3e168d8decd196509ad582ad4b8795d979a6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 26 Jul 2021 17:33:49 -0400 Subject: [PATCH] check if object is modified before starting to send it Fix bug that caused some transfers to incorrectly fail with "content changed while it was being sent", when the content was not changed. While I don't know how to reproduce the problem that several people reported, it is presumably due to the inode cache somehow being stale. So check isUnmodified', and if it's not modified, include the file's current inode cache in the set to accept, when checking for modification after the transfer. That seems like the right thing to do for another reason: The failure says the file changed while it was being sent, but if the object file was changed before the transfer started, that's wrong. So it needs to check before allowing the transfer at all if the file is modified. (Other calls to sameInodeCache or elemInodeCaches, when operating on inode caches from the database, could also be problimatic if the inode cache is somehow getting stale. This does not address such problems.) Sponsored-by: Dartmouth College's Datalad project --- Annex/Content.hs | 15 +++++++++--- Annex/Content/Presence.hs | 24 +++++++++++++------ CHANGELOG | 3 +++ ..._9a1c92462a5393155298d70ef90d3019._comment | 11 +++++++++ 4 files changed, 43 insertions(+), 10 deletions(-) create mode 100644 doc/bugs/__34__failed_to_send_content_to_remote__34__/comment_19_9a1c92462a5393155298d70ef90d3019._comment diff --git a/Annex/Content.hs b/Annex/Content.hs index 39b9d52a7e..86a3a97998 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -483,9 +483,18 @@ prepSendAnnex key = withObjectLoc key $ \f -> do fastDebug "Annex.Content" ("found no inode cache for " ++ show f) maybeToList <$> withTSDelta (liftIO . genInodeCache f) - else do - fastDebug "Annex.Content" ("found inode cache for " ++ show f) - pure cache + -- Verify that the object is not modified. Usually this + -- only has to check the inode cache, but if the cache + -- is somehow stale, it will fall back to verifying its + -- content. + else withTSDelta (liftIO . genInodeCache f) >>= \case + Just fc -> ifM (isUnmodified' key f fc cache) + ( do + fastDebug "Annex.Content" ("found inode cache for " ++ show f) + return (fc:cache) + , return [] + ) + Nothing -> return [] return $ if null cache' then Nothing else Just (fromRawFilePath f, sameInodeCache f cache') diff --git a/Annex/Content/Presence.hs b/Annex/Content/Presence.hs index 6d9d158649..b8f4fc635a 100644 --- a/Annex/Content/Presence.hs +++ b/Annex/Content/Presence.hs @@ -1,6 +1,6 @@ {- git-annex object content presence - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -15,6 +15,7 @@ module Annex.Content.Presence ( objectFileExists, withObjectLoc, isUnmodified, + isUnmodified', isUnmodifiedCheap, verifyKeyContent, VerifyConfig(..), @@ -145,11 +146,17 @@ withObjectLoc key a = a =<< calcRepo (gitAnnexLocation key) - The cheaper way is to see if the InodeCache for the key matches the - file. -} isUnmodified :: Key -> RawFilePath -> Annex Bool -isUnmodified key f = go =<< geti +isUnmodified key f = + withTSDelta (liftIO . genInodeCache f) >>= \case + Just fc -> do + ic <- Database.Keys.getInodeCaches key + isUnmodified' key f fc ic + Nothing -> return False + +isUnmodified' :: Key -> RawFilePath -> InodeCache -> [InodeCache] -> Annex Bool +isUnmodified' key f fc ic = isUnmodifiedCheap'' fc ic <||> expensivecheck where - go Nothing = return False - go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc - expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f) + expensivecheck = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f) ( do -- The file could have been modified while it was -- being verified. Detect that. @@ -177,8 +184,11 @@ isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key) =<< withTSDelta (liftIO . genInodeCache f) isUnmodifiedCheap' :: Key -> InodeCache -> Annex Bool -isUnmodifiedCheap' key fc = - anyM (compareInodeCaches fc) =<< Database.Keys.getInodeCaches key +isUnmodifiedCheap' key fc = isUnmodifiedCheap'' fc + =<< Database.Keys.getInodeCaches key + +isUnmodifiedCheap'' :: InodeCache -> [InodeCache] -> Annex Bool +isUnmodifiedCheap'' fc ic = anyM (compareInodeCaches fc) ic {- Verifies that a file is the expected content of a key. - diff --git a/CHANGELOG b/CHANGELOG index f48f93483b..3facd20fa1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -16,6 +16,9 @@ git-annex (8.20210715) UNRELEASED; urgency=medium histories.) * sync, merge: Added --allow-unrelated-histories option, which is the same as the git merge option. + * Fix bug that caused some transfers to incorrectly fail with + "content changed while it was being sent", when the content was not + changed. -- Joey Hess Wed, 14 Jul 2021 14:26:36 -0400 diff --git a/doc/bugs/__34__failed_to_send_content_to_remote__34__/comment_19_9a1c92462a5393155298d70ef90d3019._comment b/doc/bugs/__34__failed_to_send_content_to_remote__34__/comment_19_9a1c92462a5393155298d70ef90d3019._comment new file mode 100644 index 0000000000..8d43dc8c61 --- /dev/null +++ b/doc/bugs/__34__failed_to_send_content_to_remote__34__/comment_19_9a1c92462a5393155298d70ef90d3019._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 19""" + date="2021-07-26T21:20:15Z" + content=""" +I've made it fall back to checking the file's content when the inode cache +is somehow stale. I expect this will solve the problem. But, I would still +like to know how to reproduce the problem, because if something is making +the inode cache go stale, this fallback check will need to hash the file, +which could make git-annex significantly more expensive. +"""]]