From 21c0d5be6e1a3aa5ca9543b695864a7e494f984b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 7 Jan 2022 12:19:43 -0400 Subject: [PATCH] comment --- Annex/Content.hs | 1 + CHANGELOG | 4 ++++ ..._5ed6591954aeafe6c99ae152f4f4ad67._comment | 24 +++++++++++++++++++ 3 files changed, 29 insertions(+) create mode 100644 doc/bugs/Failure_to_get_small_files_over_P2P_protocol/comment_6_5ed6591954aeafe6c99ae152f4f4ad67._comment diff --git a/Annex/Content.hs b/Annex/Content.hs index 58f1244070..e48e9d6d32 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -222,6 +222,7 @@ getViaTmpFromDisk rsp v key af action = checkallowed $ do tmpfile <- prepTmp key resuming <- liftIO $ R.doesPathExist tmpfile (ok, verification) <- action tmpfile + liftIO $ print ok -- When the temp file already had content, we don't know if -- that content is good or not, so only trust if it the action -- Verified it in passing. Otherwise, force verification even diff --git a/CHANGELOG b/CHANGELOG index b8c375d323..c6c35d37ba 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,10 @@ git-annex (8.20211232) UNRELEASED; urgency=medium preserve it in the imported tree so it does not get deleted. * enableremote, renameremote: Better handling of the unusual case where multiple special remotes have been initialized with the same name. + * Recover from over the wire errors when downloading from remotes, + by deleting the object file when verification of it fails. This allows + the next attempt at a download to succeed, rather than using the same + content and failing again. -- Joey Hess Mon, 03 Jan 2022 14:01:14 -0400 diff --git a/doc/bugs/Failure_to_get_small_files_over_P2P_protocol/comment_6_5ed6591954aeafe6c99ae152f4f4ad67._comment b/doc/bugs/Failure_to_get_small_files_over_P2P_protocol/comment_6_5ed6591954aeafe6c99ae152f4f4ad67._comment new file mode 100644 index 0000000000..08dc1ba5e3 --- /dev/null +++ b/doc/bugs/Failure_to_get_small_files_over_P2P_protocol/comment_6_5ed6591954aeafe6c99ae152f4f4ad67._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 6""" + date="2022-01-07T16:12:20Z" + content=""" +Current thinking on deleting corrupted tmp files: If a download succeeds, +and verification then fails, the whole file content has been downloaded, +and is corrupt. So it would be ok to always delete it then, as far as p2p +transfers goes. + +For other remotes, the same is often true. The only exceptions are like +rsync and bittorrent, which can recover from corruption on retry. But, +I don't think either rsync or bittorrent will usually write corrupt data +to a file anyway. They would catch over-the-wire corruption with rolling +checksums etc. So, it seems like a verification should never fail after +a successful rsync or bittorrent download. Unless the disk corrupted the +data in the meantime. Which is an unlikely situation, and not one that it's +really necessary for git-annex to recover from with optimal efficiency. + +... Oh interesting.. It already is supposed to do that, in +getViaTmpFromDisk. It seems, what is happening is the transfer fails +when all the file content is present, and so it never gets to the point of +verifying it, let alone deleting it. +"""]]