From 53744e132df0496186c7769c905fb0648f9042eb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 18 Aug 2021 15:13:14 -0400 Subject: [PATCH] incremental verification for gitlfs and httpalso And that should be all the special remotes supporting it on linux now, except for in the odd edge case here and there. Sponsored-by: Dartmouth College's DANDI project --- CHANGELOG | 8 +++----- Remote/GitLFS.hs | 8 ++++---- Remote/HttpAlso.hs | 18 ++++++++++-------- ...6_fbbcf1d8b35078274cfe322cea6de21c._comment | 13 ++++--------- 4 files changed, 21 insertions(+), 26 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 481c8b82ea..c8b15d3c46 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,11 +9,9 @@ git-annex (8.20210804) UNRELEASED; urgency=medium * rsync special remote: Stop displaying rsync progress, and use git-annex's own progress display. * Many special remotes now checksum content while it is being retrieved, - instead of in a separate pass at the end. This is supported for most - special remotes on Linux (except for bittorrent and gitlfs), - and for a few on other OSs (directory, web, S3, webdav, bup, ddar, - gcrypt, glacier). Special remotes using chunking or encryption also - support it. But exporttree/importtree special remotes do not. + instead of in a separate pass at the end. This is supported for all + special remotes on Linux (except for bittorrent), and for many + on other OS's (except for adb, external, gcrypt, hook, and rsync). -- Joey Hess Tue, 03 Aug 2021 12:22:45 -0400 diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 38dce48392..20e755e445 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -476,7 +476,7 @@ store rs h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \ca makeSmallAPIRequest . setRequestCheckStatus retrieve :: RemoteStateHandle -> TVar LFSHandle -> Retriever -retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload h >>= \case +retrieve rs h = fileRetriever' $ \dest k p iv -> getLFSEndpoint LFS.RequestDownload h >>= \case Nothing -> giveup "unable to connect to git-lfs endpoint" Just endpoint -> mkDownloadRequest rs k >>= \case Nothing -> giveup "unable to download this object from git-lfs" @@ -487,9 +487,9 @@ retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload (tro:_) | LFS.resp_oid tro /= sha256 || LFS.resp_size tro /= size -> giveup "git-lfs server replied with other object than the one we requested" - | otherwise -> go dest p tro + | otherwise -> go dest p iv tro where - go dest p tro = case LFS.resp_error tro of + go dest p iv tro = case LFS.resp_error tro of Just err -> giveup $ T.unpack $ LFS.respobjerr_message err Nothing -> case LFS.resp_actions tro of Nothing -> giveup "git-lfs server did not provide a way to download this object" @@ -497,7 +497,7 @@ retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload Nothing -> giveup "unable to parse git-lfs server download url" Just req -> do uo <- getUrlOptions - liftIO $ downloadConduit p Nothing req (fromRawFilePath dest) uo + liftIO $ downloadConduit p iv req (fromRawFilePath dest) uo -- Since git-lfs does not support removing content, nothing needs to be -- done to lock content in the remote, except for checking that the content diff --git a/Remote/HttpAlso.hs b/Remote/HttpAlso.hs index 79cc0771c8..49310fd01b 100644 --- a/Remote/HttpAlso.hs +++ b/Remote/HttpAlso.hs @@ -1,6 +1,6 @@ {- HttpAlso remote (readonly). - - - Copyright 2020 Joey Hess + - Copyright 2020-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -20,6 +20,7 @@ import Logs.Web import Creds import Messages.Progress import Utility.Metered +import Annex.Verify import qualified Annex.Url as Url import Annex.SpecialRemote.Config @@ -114,19 +115,20 @@ httpAlsoSetup _ (Just u) _ c gc = do return (c', u) downloadKey :: Maybe URLString -> LearnedLayout -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification -downloadKey baseurl ll key _af dest p _vc = do - downloadAction dest p key (keyUrlAction baseurl ll key) - return UnVerified +downloadKey baseurl ll key _af dest p vc = do + iv <- startVerifyKeyContentIncrementally vc key + downloadAction dest p iv key (keyUrlAction baseurl ll key) + snd <$> finishVerifyKeyContentIncrementally iv retriveExportHttpAlso :: Maybe URLString -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex () retriveExportHttpAlso baseurl key loc dest p = - downloadAction dest p key (exportLocationUrlAction baseurl loc) + downloadAction dest p Nothing key (exportLocationUrlAction baseurl loc) -downloadAction :: FilePath -> MeterUpdate -> Key -> ((URLString -> Annex (Either String ())) -> Annex (Either String ())) -> Annex () -downloadAction dest p key run = +downloadAction :: FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Key -> ((URLString -> Annex (Either String ())) -> Annex (Either String ())) -> Annex () +downloadAction dest p iv key run = Url.withUrlOptions $ \uo -> meteredFile dest (Just p) key $ - run (\url -> Url.download' p Nothing url dest uo) + run (\url -> Url.download' p iv url dest uo) >>= either giveup (const (return ())) checkKey :: Maybe URLString -> LearnedLayout -> Key -> Annex Bool diff --git a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment index 812e9c7989..155bdd2f7a 100644 --- a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment +++ b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment @@ -5,15 +5,10 @@ content=""" The concurrency problem is fixed now. -Directory, webdav, web, and S3 now also do incremental hashing. +All special remotes now do incremental hashing in most cases +(on linux, subset on other OS's). -These do not do incremental hashing -still: gitlfs, httpalso. Problem is, these open the file -for write. That prevents tailVerify re-opening it for read, because the -haskell RTS actually does not allowing opening a file for read that it has -open for write. The new `fileRetriever\`` can be used instead to fix these, -but will take some more work. - -Also, retrieval from export/import special remotes does not do incremental +Only thing remaining is: +Retrieval from export/import special remotes does not do incremental hashing (except for versioned ones, which sometimes use retrieveKeyFile). """]]