diff --git a/Annex/Content.hs b/Annex/Content.hs index e48e9d6d32..68d21506f1 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -1,6 +1,6 @@ {- git-annex file content managing - - - Copyright 2010-2021 Joey Hess + - Copyright 2010-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -19,6 +19,7 @@ module Annex.Content ( RetrievalSecurityPolicy(..), getViaTmp, getViaTmpFromDisk, + verificationOfContentFailed, checkDiskSpaceToGet, checkSecureHashes, prepTmp, @@ -222,7 +223,6 @@ getViaTmpFromDisk rsp v key af action = checkallowed $ do tmpfile <- prepTmp key resuming <- liftIO $ R.doesPathExist tmpfile (ok, verification) <- action tmpfile - liftIO $ print ok -- When the temp file already had content, we don't know if -- that content is good or not, so only trust if it the action -- Verified it in passing. Otherwise, force verification even @@ -236,16 +236,7 @@ getViaTmpFromDisk rsp v key af action = checkallowed $ do then ifM (verifyKeyContentPostRetrieval rsp v verification' key tmpfile) ( pruneTmpWorkDirBefore tmpfile (moveAnnex key af) , do - warning "verification of content failed" - -- The bad content is not retained, because - -- a retry should not try to resume from it - -- since it's apparently corrupted. - -- Also, the bad content could be any data, - -- including perhaps the content of another - -- file than the one that was requested, - -- and so it's best not to keep it on disk. - pruneTmpWorkDirBefore tmpfile - (liftIO . removeWhenExistsWith R.removeLink) + verificationOfContentFailed tmpfile return False ) -- On transfer failure, the tmp file is left behind, in case @@ -264,6 +255,24 @@ getViaTmpFromDisk rsp v key af action = checkallowed $ do ) ) +{- When the content of a file that was successfully transferred from a remote + - fails to verify, use this to display a message so the user knows why it + - failed, and to clean up the corrupted content. + - + - The bad content is not retained, because the transfer of it succeeded. + - So it's not incomplete and a resume using it will not work. While + - some protocols like rsync could recover such a bad content file, + - they are assumed to not write out bad data to a file in the first place. + - Most protocols, including the P2P protocol, pick up downloads where they + - left off, and so if the bad content were not deleted, repeated downloads + - would continue to fail. + -} +verificationOfContentFailed :: RawFilePath -> Annex () +verificationOfContentFailed tmpfile = do + warning "Verification of content failed" + pruneTmpWorkDirBefore tmpfile + (liftIO . removeWhenExistsWith R.removeLink) + {- Checks if there is enough free disk space to download a key - to its temp file. - diff --git a/CHANGELOG b/CHANGELOG index c6c35d37ba..79d655a6b4 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,10 +5,10 @@ git-annex (8.20211232) UNRELEASED; urgency=medium preserve it in the imported tree so it does not get deleted. * enableremote, renameremote: Better handling of the unusual case where multiple special remotes have been initialized with the same name. - * Recover from over the wire errors when downloading from remotes, - by deleting the object file when verification of it fails. This allows - the next attempt at a download to succeed, rather than using the same - content and failing again. + * Recover from corrupted content being received from a git remote, + by deleting the temporary file when it fails to verify. This prevents + a retry from failing again. + (reversion introduced in version 8.20210903) -- Joey Hess Mon, 03 Jan 2022 14:01:14 -0400 diff --git a/P2P/Annex.hs b/P2P/Annex.hs index a2641d38da..08177bec6e 100644 --- a/P2P/Annex.hs +++ b/P2P/Annex.hs @@ -1,6 +1,6 @@ {- P2P protocol, Annex implementation - - - Copyright 2016-2021 Joey Hess + - Copyright 2016-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -206,7 +206,9 @@ runLocal runst runner a = case a of | rightsize -> liftIO (finalizeIncrementalVerifier iv) >>= \case Nothing -> return (True, UnVerified) Just True -> return (True, Verified) - Just False -> return (False, UnVerified) + Just False -> do + verificationOfContentFailed (toRawFilePath dest) + return (False, UnVerified) | otherwise -> return (False, UnVerified) Nothing -> return (rightsize, UnVerified) Right (Just Invalid) | l == 0 -> diff --git a/Remote/Git.hs b/Remote/Git.hs index 99ccc60330..45e79ee348 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -30,6 +30,7 @@ import Logs.Presence import Annex.Transfer import Annex.CopyFile import Annex.Verify +import Annex.Content (verificationOfContentFailed) import Annex.UUID import qualified Annex.Content import qualified Annex.BranchState @@ -706,7 +707,9 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do ifM (liftIO (catchBoolIO (linker src dest))) ( ifM check ( return (True, Verified) - , return (False, UnVerified) + , do + verificationOfContentFailed (toRawFilePath dest) + return (False, UnVerified) ) , copier src dest k p check verifyconfig ) @@ -717,7 +720,9 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do fileCopier copycowtried src dest p iv >>= \case Copied -> ifM check ( finishVerifyKeyContentIncrementally iv - , return (False, UnVerified) + , do + verificationOfContentFailed (toRawFilePath dest) + return (False, UnVerified) ) CopiedCoW -> unVerified check diff --git a/doc/bugs/Failure_to_get_small_files_over_P2P_protocol.mdwn b/doc/bugs/Failure_to_get_small_files_over_P2P_protocol.mdwn index 2b8eb58278..cb0a2b663b 100644 --- a/doc/bugs/Failure_to_get_small_files_over_P2P_protocol.mdwn +++ b/doc/bugs/Failure_to_get_small_files_over_P2P_protocol.mdwn @@ -115,3 +115,6 @@ Both servers are debian. I've been using git annex for years now, some periods more intensively than others. The only data I've lost I can only blame on my own stupidity, even after a terminal disk failure I managed to recover almost everything with a reinject. I'm still using it entirely cli though, no assistants or gui for me. [[!meta title="corrupted tmp file can prevent getting an object"]] + +> [[fixed|done]] (the related issue that I discovered in comment #1 +> specifically) --[[Joey]]