From 5783a8d0814fdf23c7a00df06298d3c49c1d6b64 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 14 Apr 2021 13:22:54 -0400 Subject: [PATCH] fsck: avoid redundant checksum when transfer is Verified When downloading content from a remote, if the content is able to be verified during the transfer, skip checksumming it a second time. Note that in this case, the fsck output does not include "(checksum)" which it does when the checksumming is done separately from the download. This commit was sponsored by Brock Spratlen on Patreon. --- CHANGELOG | 2 ++ Command/Fsck.hs | 19 +++++++------ ..._1a70ae7c9821d664ddf72fd4c431be29._comment | 28 +++++++++++++++++++ 3 files changed, 41 insertions(+), 8 deletions(-) create mode 100644 doc/todo/Fsck_remote_files_in-flight/comment_1_1a70ae7c9821d664ddf72fd4c431be29._comment diff --git a/CHANGELOG b/CHANGELOG index e873779817..2887c033c9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,8 @@ git-annex (8.20210331) UNRELEASED; urgency=medium * diffdriver: Support unlocked files. * forget: Preserve currently exported trees, avoiding problems with exporttree remotes in some unusual circumstances. + * fsck: When downloading content from a remote, if the content is able + to be verified during the transfer, skip checksumming it a second time. -- Joey Hess Thu, 01 Apr 2021 12:17:26 -0400 diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 30f5db2638..9a2b2407b5 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -156,17 +156,20 @@ performRemote key afile backend numcopies remote = dispatch (Right True) = withtmp $ \tmpfile -> getfile tmpfile >>= \case Nothing -> go True Nothing - Just True -> go True (Just tmpfile) - Just False -> do + Just (Right verification) -> go True (Just (tmpfile, verification)) + Just (Left _) -> do warning "failed to download file from remote" void $ go True Nothing return False dispatch (Right False) = go False Nothing - go present localcopy = check + go present lv = check [ verifyLocationLogRemote key ai remote present , verifyRequiredContent key ai - , withLocalCopy localcopy $ checkKeySizeRemote key remote ai - , withLocalCopy localcopy $ checkBackendRemote backend key remote ai + , withLocalCopy (fmap fst lv) $ checkKeySizeRemote key remote ai + , case fmap snd lv of + Just Verified -> return True + _ -> withLocalCopy (fmap fst lv) $ + checkBackendRemote backend key remote ai , checkKeyNumCopies key afile numcopies ] ai = mkActionItem (key, afile) @@ -185,13 +188,13 @@ performRemote key afile backend numcopies remote = cleanup `after` a tmp getfile tmp = ifM (checkDiskSpace (Just (P.takeDirectory tmp)) key 0 True) ( ifM (getcheap tmp) - ( return (Just True) + ( return (Just (Right UnVerified)) , ifM (Annex.getState Annex.fast) ( return Nothing - , Just . isRight <$> tryNonAsync (getfile' tmp) + , Just <$> tryNonAsync (getfile' tmp) ) ) - , return (Just False) + , return Nothing ) getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) dummymeter dummymeter _ = noop diff --git a/doc/todo/Fsck_remote_files_in-flight/comment_1_1a70ae7c9821d664ddf72fd4c431be29._comment b/doc/todo/Fsck_remote_files_in-flight/comment_1_1a70ae7c9821d664ddf72fd4c431be29._comment new file mode 100644 index 0000000000..7c6f486e88 --- /dev/null +++ b/doc/todo/Fsck_remote_files_in-flight/comment_1_1a70ae7c9821d664ddf72fd4c431be29._comment @@ -0,0 +1,28 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2021-04-14T17:07:50Z" + content=""" +Only some remotes support checksums in-flight; this recently includes +downloads from other git-annex repositories over ssh. Progress +on that front is being tracked at + +Most special remotes can't yet, but that should change eventually +for at least some of them. + +I've made fsck notice when content was able to be verified as part of a +transfer, and avoid a redundant checksum of them. + +What I've not done, and don't think I will be able to, is make the file +not be written to disk by fsck in that case. Since the `retrieveKeyFile` +interface is explicitly about writing to a file on disk, it would take ether +a whole separate interface being implemented for all remotes that avoids +writing to the file when they can checksum in flight, or it would need +some change to the `retrieveKeyFile` interface to do the same. + +Neither seems worth the complication to implement just to reduce disk IO in +this particular case. And it seems likely that, for files that fit in +memory, it never actually reaches disk before it's deleted. Also if this is +a concern for you, you can I guess avoid fscking remotes too frequently or +use a less fragile medium? +"""]]