incremental verification for S3

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2021-08-18 15:07:00 -04:00
parent d154e7022e
commit f5e09a1dbe
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 17 additions and 16 deletions

View file

@ -10,10 +10,10 @@ git-annex (8.20210804) UNRELEASED; urgency=medium
git-annex's own progress display.
* Many special remotes now checksum content while it is being retrieved,
instead of in a separate pass at the end. This is supported for most
special remotes on Linux (except for bittorrent, gitlfs, and S3),
and for a few on other OSs (directory, web, webdav, bup, ddar, gcrypt,
glacier). Special remotes using chunking or encryption also support
it. But exporttree/importtree special remotes do not.
special remotes on Linux (except for bittorrent and gitlfs),
and for a few on other OSs (directory, web, S3, webdav, bup, ddar,
gcrypt, glacier). Special remotes using chunking or encryption also
support it. But exporttree/importtree special remotes do not.
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400

View file

@ -60,6 +60,7 @@ import Types.MetaData
import Types.ProposedAccepted
import Types.NumCopies
import Utility.Metered
import Utility.Hash (IncrementalVerifier)
import Utility.DataUnits
import Annex.Content
import qualified Annex.Url as Url
@ -401,32 +402,32 @@ storeHelper info h magic f object p = liftIO $ case partSize info of
- out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -}
retrieve :: S3HandleVar -> Remote -> RemoteStateHandle -> ParsedRemoteConfig -> S3Info -> Retriever
retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case
retrieve hv r rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
(Just h) ->
eitherS3VersionID info rs c k (T.pack $ bucketObject info k) >>= \case
Left failreason -> do
warning failreason
giveup "cannot download content"
Right loc -> retrieveHelper info h loc (fromRawFilePath f) p
Right loc -> retrieveHelper info h loc (fromRawFilePath f) p iv
Nothing ->
getPublicWebUrls' (uuid r) rs info c k >>= \case
Left failreason -> do
warning failreason
giveup "cannot download content"
Right us -> unlessM (withUrlOptions $ downloadUrl k p Nothing us (fromRawFilePath f)) $
Right us -> unlessM (withUrlOptions $ downloadUrl k p iv us (fromRawFilePath f)) $
giveup "failed to download content"
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()
retrieveHelper info h loc f p = retrieveHelper' h f p $
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()
retrieveHelper info h loc f p iv = retrieveHelper' h f p iv $
case loc of
Left o -> S3.getObject (bucket info) o
Right (S3VersionID o vid) -> (S3.getObject (bucket info) o)
{ S3.goVersionId = Just vid }
retrieveHelper' :: S3Handle -> FilePath -> MeterUpdate -> S3.GetObject -> Annex ()
retrieveHelper' h f p req = liftIO $ runResourceT $ do
retrieveHelper' :: S3Handle -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> S3.GetObject -> Annex ()
retrieveHelper' h f p iv req = liftIO $ runResourceT $ do
S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle h req
Url.sinkResponseFile p Nothing zeroBytesProcessed f WriteMode rsp
Url.sinkResponseFile p iv zeroBytesProcessed f WriteMode rsp
remove :: S3HandleVar -> Remote -> S3Info -> Remover
remove hv r info k = withS3HandleOrFail (uuid r) hv $ \h -> do
@ -497,7 +498,7 @@ storeExportS3' hv r rs info magic f k loc p = withS3Handle hv $ \case
retrieveExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex ()
retrieveExportS3 hv r info _k loc f p = do
withS3Handle hv $ \case
Just h -> retrieveHelper info h (Left (T.pack exportloc)) f p
Just h -> retrieveHelper info h (Left (T.pack exportloc)) f p Nothing
Nothing -> case getPublicUrlMaker info of
Just geturl -> either giveup return =<<
Url.withUrlOptions
@ -649,7 +650,7 @@ mkImportableContentsVersioned info = build . groupfiles
retrieveExportWithContentIdentifierS3 :: S3HandleVar -> Remote -> RemoteStateHandle -> S3Info -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierS3 hv r rs info loc cid dest mkkey p = withS3Handle hv $ \case
Just h -> do
rewritePreconditionException $ retrieveHelper' h dest p $
rewritePreconditionException $ retrieveHelper' h dest p Nothing $
limitGetToContentIdentifier cid $
S3.getObject (bucket info) o
k <- mkkey

View file

@ -5,10 +5,10 @@
content="""
The concurrency problem is fixed now.
Directory and webdav and web now also do incremental hashing.
Directory, webdav, web, and S3 now also do incremental hashing.
These do not do incremental hashing
still: gitlfs, S3, httpalso. Problem is, these open the file
still: gitlfs, httpalso. Problem is, these open the file
for write. That prevents tailVerify re-opening it for read, because the
haskell RTS actually does not allowing opening a file for read that it has
open for write. The new `fileRetriever\`` can be used instead to fix these,