versioned import from S3 is working

Still some bugs and two stubbed methods to implement though.
This commit is contained in:
Joey Hess 2019-04-19 15:10:31 -04:00
parent 833980c0bc
commit 2f79cb4b45
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 70 additions and 23 deletions

View file

@ -4,6 +4,14 @@ git-annex (7.20190323) UNRELEASED; urgency=medium
to allow git-annex import of files from an Android device. This can be to allow git-annex import of files from an Android device. This can be
combined with exporttree=yes and git-annex export used to send changes combined with exporttree=yes and git-annex export used to send changes
back to the Android device. back to the Android device.
* S3 special remote supports being configured with importree=yes,
to allow git-annex import of files from a S3 bucket. This can be
combined with exporttree=yes and git-annex export used to send changes
back to the S3 bucket.
* S3: When versioning is enabled on a bucket, importing from it will
import old versions of files that were written to the bucket as well
as the current versions. A git history is synthesized to reflect the way
the bucket changed over time.
* Fix bug that caused importing from a special remote to repeatedly * Fix bug that caused importing from a special remote to repeatedly
download unchanged files when multiple files in the remote have the same download unchanged files when multiple files in the remote have the same
content. content.

View file

@ -116,7 +116,7 @@ gen r u c gc = do
} }
, importActions = ImportActions , importActions = ImportActions
{ listImportableContents = listImportableContentsS3 hdl this info { listImportableContents = listImportableContentsS3 hdl this info
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierS3 hdl info , retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierS3 hdl this info
, storeExportWithContentIdentifier = storeExportWithContentIdentifierS3 hdl this info magic , storeExportWithContentIdentifier = storeExportWithContentIdentifierS3 hdl this info magic
, removeExportWithContentIdentifier = removeExportWithContentIdentifierS3 hdl info , removeExportWithContentIdentifier = removeExportWithContentIdentifierS3 hdl info
, removeExportDirectoryWhenEmpty = Nothing , removeExportDirectoryWhenEmpty = Nothing
@ -305,11 +305,14 @@ retrieve hv r c info = fileRetriever $ \f k p -> withS3Handle hv $ \case
giveup "failed to download content" giveup "failed to download content"
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex () retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()
retrieveHelper info h loc f p = liftIO $ runResourceT $ do retrieveHelper info h loc f p = retrieveHelper' h f p $
let req = case loc of case loc of
Left o -> S3.getObject (bucket info) o Left o -> S3.getObject (bucket info) o
Right (S3VersionID o vid) -> (S3.getObject (bucket info) o) Right (S3VersionID o vid) -> (S3.getObject (bucket info) o)
{ S3.goVersionId = Just vid } { S3.goVersionId = Just vid }
retrieveHelper' :: S3Handle -> FilePath -> MeterUpdate -> S3.GetObject -> Annex ()
retrieveHelper' h f p req = liftIO $ runResourceT $ do
S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle h req S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle h req
Url.sinkResponseFile p zeroBytesProcessed f WriteMode rsp Url.sinkResponseFile p zeroBytesProcessed f WriteMode rsp
@ -443,12 +446,10 @@ listImportableContentsS3 hv r info =
| versioning info = do | versioning info = do
rsp <- sendS3Handle h $ rsp <- sendS3Handle h $
S3.getBucketObjectVersions (bucket info) S3.getBucketObjectVersions (bucket info)
liftIO $ print rsp
continuelistversioned h [] rsp continuelistversioned h [] rsp
| otherwise = do | otherwise = do
rsp <- sendS3Handle h $ rsp <- sendS3Handle h $
S3.getBucket (bucket info) S3.getBucket (bucket info)
liftIO $ print rsp
continuelistunversioned h [] rsp continuelistunversioned h [] rsp
continuelistunversioned h l rsp continuelistunversioned h l rsp
@ -457,7 +458,6 @@ listImportableContentsS3 hv r info =
(S3.getBucket (bucket info)) (S3.getBucket (bucket info))
{ S3.gbMarker = S3.gbrNextMarker rsp { S3.gbMarker = S3.gbrNextMarker rsp
} }
liftIO $ print rsp'
continuelistunversioned h (rsp:l) rsp' continuelistunversioned h (rsp:l) rsp'
| otherwise = return $ | otherwise = return $
mkImportableContentsUnversioned info (reverse (rsp:l)) mkImportableContentsUnversioned info (reverse (rsp:l))
@ -469,12 +469,9 @@ listImportableContentsS3 hv r info =
{ S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp { S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp
, S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp , S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp
} }
liftIO $ print rsp
continuelistversioned h (rsp:l) rsp' continuelistversioned h (rsp:l) rsp'
| otherwise = do | otherwise = return $
let v = mkImportableContentsVersioned info (reverse (rsp:l)) mkImportableContentsVersioned info (reverse (rsp:l))
liftIO $ print v
return v
mkImportableContentsUnversioned :: S3Info -> [S3.GetBucketResponse] -> ImportableContents (ContentIdentifier, ByteSize) mkImportableContentsUnversioned :: S3Info -> [S3.GetBucketResponse] -> ImportableContents (ContentIdentifier, ByteSize)
mkImportableContentsUnversioned info l = ImportableContents mkImportableContentsUnversioned info l = ImportableContents
@ -486,8 +483,7 @@ mkImportableContentsUnversioned info l = ImportableContents
loc <- bucketImportLocation info $ loc <- bucketImportLocation info $
T.unpack $ S3.objectKey oi T.unpack $ S3.objectKey oi
let sz = S3.objectSize oi let sz = S3.objectSize oi
let cid = mkS3UnversionedContentIdentifier $ let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi
S3.objectETag oi
return (loc, (cid, sz)) return (loc, (cid, sz))
mkImportableContentsVersioned :: S3Info -> [S3.GetBucketObjectVersionsResponse] -> ImportableContents (ContentIdentifier, ByteSize) mkImportableContentsVersioned :: S3Info -> [S3.GetBucketObjectVersionsResponse] -> ImportableContents (ContentIdentifier, ByteSize)
@ -507,8 +503,7 @@ mkImportableContentsVersioned info = build . groupfiles
loc <- bucketImportLocation info $ loc <- bucketImportLocation info $
T.unpack $ S3.oviKey ovi T.unpack $ S3.oviKey ovi
let sz = S3.oviSize ovi let sz = S3.oviSize ovi
let cid = mkS3UnversionedContentIdentifier $ let cid = mkS3VersionedContentIdentifier' ovi
S3.oviETag ovi
return (loc, (cid, sz)) return (loc, (cid, sz))
extract (S3.DeleteMarker {}) = Nothing extract (S3.DeleteMarker {}) = Nothing
@ -537,8 +532,27 @@ mkImportableContentsVersioned info = build . groupfiles
| otherwise = | otherwise =
i : removemostrecent mtime rest i : removemostrecent mtime rest
retrieveExportWithContentIdentifierS3 :: S3HandleVar -> S3Info -> ExportLocation -> ContentIdentifier -> FilePath -> Annex (Maybe Key) -> MeterUpdate -> Annex (Maybe Key) retrieveExportWithContentIdentifierS3 :: S3HandleVar -> Remote -> S3Info -> ExportLocation -> ContentIdentifier -> FilePath -> Annex (Maybe Key) -> MeterUpdate -> Annex (Maybe Key)
retrieveExportWithContentIdentifierS3 hv info loc cid dest mkkey p = undefined retrieveExportWithContentIdentifierS3 hv r info loc cid dest mkkey p = withS3Handle hv $ \case
Nothing -> do
warning $ needS3Creds (uuid r)
return Nothing
Just h -> flip catchNonAsync (\e -> warning (show e) >> return Nothing) $ do
rewritePreconditionException $ retrieveHelper' h dest p $
limitGetToContentIdentifier cid $
S3.getObject (bucket info) o
mkkey
where
o = T.pack $ bucketExportLocation info loc
{- Catch exception getObject returns when a precondition is not met,
- and replace with a more understandable message for the user. -}
rewritePreconditionException :: Annex a -> Annex a
rewritePreconditionException a = catchJust (Url.matchStatusCodeException want) a $
const $ giveup "requested version of object is not available in S3 bucket"
where
want st = statusCode st == 412 &&
statusMessage st == "Precondition Failed"
-- Does not check if content on S3 is safe to overwrite, because there -- Does not check if content on S3 is safe to overwrite, because there
-- is no atomic way to do so. When the bucket is versioned, this is -- is no atomic way to do so. When the bucket is versioned, this is
@ -998,15 +1012,31 @@ mkS3VersionedContentIdentifier :: S3VersionID -> ContentIdentifier
mkS3VersionedContentIdentifier (S3VersionID _ v) = mkS3VersionedContentIdentifier (S3VersionID _ v) =
ContentIdentifier $ T.encodeUtf8 v ContentIdentifier $ T.encodeUtf8 v
-- S3 returns etags surrounded by double quotes, and the quotes are mkS3VersionedContentIdentifier' :: S3.ObjectVersionInfo -> ContentIdentifier
-- included here. mkS3VersionedContentIdentifier' =
ContentIdentifier . T.encodeUtf8 . S3.oviVersionId
-- S3 returns etags surrounded by double quotes, and the quotes may
-- be included here.
type S3Etag = T.Text type S3Etag = T.Text
-- For an unversioned bucket, the S3Etag is instead used as the -- For an unversioned bucket, the S3Etag is instead used as the
-- ContentIdentifier. -- ContentIdentifier. Prefixed by '#' since that cannot appear in a S3
-- version id.
mkS3UnversionedContentIdentifier :: S3Etag -> ContentIdentifier mkS3UnversionedContentIdentifier :: S3Etag -> ContentIdentifier
mkS3UnversionedContentIdentifier t = mkS3UnversionedContentIdentifier t =
ContentIdentifier $ T.encodeUtf8 $ T.filter (/= '"') t ContentIdentifier $ T.encodeUtf8 $ "#" <> T.filter (/= '"') t
-- Makes a GetObject request be guaranteed to get the object version
-- matching the ContentIdentifier, or fail.
limitGetToContentIdentifier :: ContentIdentifier -> S3.GetObject -> S3.GetObject
limitGetToContentIdentifier (ContentIdentifier v) req =
let t = either mempty id (T.decodeUtf8' v)
in case T.take 1 t of
"#" ->
let etag = T.drop 1 t
in req { S3.goIfMatch = Just etag }
_ -> req { S3.goVersionId = Just t }
setS3VersionID :: S3Info -> UUID -> Key -> Maybe S3VersionID -> Annex () setS3VersionID :: S3Info -> UUID -> Key -> Maybe S3VersionID -> Annex ()
setS3VersionID info u k vid setS3VersionID info u k vid

View file

@ -13,8 +13,17 @@ and `git annex sync --content` can be configured to use it.
## remaining todo ## remaining todo
* Currently only directory and adb special remotes support importing, * There's a bug in the way the import history is added to the
at least S3 can also support it. previous history for the remote. This leads to an ever-growing history
when importing from a versioned S3 remote,
although the head of it does reflect the current state of the remote.
* Two undefs in Remote.S3 remain.
* Need to test S3 import from unversioned bucket.
* Need to test S3 import from versioned bucket, including handling
of deletion of files.
* Write a tip or tips to document using this new feature. * Write a tip or tips to document using this new feature.
(Have one for adb now.) (Have one for adb now.)