store S3 version IDs
Only done when versioning=yes is configured. It could always do it when S3 sends back a version id, but there may be buckets that have versioning enabled by accident, so it seemed better to honor the configuration. S3's docs say version IDs are "randomly generated", so presumably storing the same content twice gets two different ones not the same one. So I considered storing a list of version IDs for a key. That would allow removing the key completely. But.. The way Logs.RemoteState works, when there are multiple writers, the last writer wins. So storing a list would need a different log format that merges, which seemed overkill to support removing a key from an append-only remote. Note that Logs.RemoteState for S3 is now dedicated to version IDs. If something else needs to be stored, a new log will be needed to do it. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
0ff5a41311
commit
794e9a7a44
2 changed files with 43 additions and 6 deletions
39
Remote/S3.hs
39
Remote/S3.hs
|
@ -44,6 +44,7 @@ import Remote.Helper.Export
|
|||
import qualified Remote.Helper.AWS as AWS
|
||||
import Creds
|
||||
import Annex.UUID
|
||||
import Logs.RemoteState
|
||||
import Logs.Web
|
||||
import Utility.Metered
|
||||
import qualified Annex.Url as Url
|
||||
|
@ -187,13 +188,13 @@ prepareS3HandleMaybe r = resourcePrepare $ const $
|
|||
|
||||
store :: Remote -> S3Info -> S3Handle -> Storer
|
||||
store _r info h = fileStorer $ \k f p -> do
|
||||
storeHelper info h f (T.pack $ bucketObject info k) p
|
||||
void $ storeHelper info h f (T.pack $ bucketObject info k) p
|
||||
-- Store public URL to item in Internet Archive.
|
||||
when (isIA info && not (isChunkKey k)) $
|
||||
setUrlPresent webUUID k (iaPublicUrl info (bucketObject info k))
|
||||
return True
|
||||
|
||||
storeHelper :: S3Info -> S3Handle -> FilePath -> S3.Object -> MeterUpdate -> Annex ()
|
||||
storeHelper :: S3Info -> S3Handle -> FilePath -> S3.Object -> MeterUpdate -> Annex (Maybe S3VersionID)
|
||||
storeHelper info h f object p = case partSize info of
|
||||
Just partsz | partsz > 0 -> do
|
||||
fsz <- liftIO $ getFileSize f
|
||||
|
@ -204,7 +205,8 @@ storeHelper info h f object p = case partSize info of
|
|||
where
|
||||
singlepartupload = do
|
||||
rbody <- liftIO $ httpBodyStorer f p
|
||||
void $ sendS3Handle h $ putObject info object rbody
|
||||
r <- sendS3Handle h $ putObject info object rbody
|
||||
return (mkS3VersionID (S3.porVersionId r))
|
||||
multipartupload fsz partsz = do
|
||||
#if MIN_VERSION_aws(0,10,6)
|
||||
let startreq = (S3.postInitiateMultipartUpload (bucket info) object)
|
||||
|
@ -241,8 +243,9 @@ storeHelper info h f object p = case partSize info of
|
|||
sendparts (offsetMeterUpdate meter (toBytesProcessed sz)) (etag:etags) (partnum + 1)
|
||||
sendparts p [] 1
|
||||
|
||||
void $ sendS3Handle h $ S3.postCompleteMultipartUpload
|
||||
r <- sendS3Handle h $ S3.postCompleteMultipartUpload
|
||||
(bucket info) object uploadid (zip [1..] etags)
|
||||
return (mkS3VersionID (S3.cmurVersionId r))
|
||||
#else
|
||||
warning $ "Cannot do multipart upload (partsize " ++ show partsz ++ ") of large file (" ++ show fsz ++ "); built with too old a version of the aws library."
|
||||
singlepartupload
|
||||
|
@ -320,11 +323,14 @@ checkKeyHelper info h object = do
|
|||
#endif
|
||||
|
||||
storeExportS3 :: UUID -> S3Info -> Maybe S3Handle -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
|
||||
storeExportS3 _u info (Just h) f _k loc p =
|
||||
storeExportS3 u info (Just h) f k loc p =
|
||||
catchNonAsync go (\e -> warning (show e) >> return False)
|
||||
where
|
||||
go = do
|
||||
storeHelper info h f (T.pack $ bucketExportLocation info loc) p
|
||||
>>= if versioning info
|
||||
then setS3VersionID u k
|
||||
else const noop
|
||||
return True
|
||||
storeExportS3 u _ Nothing _ _ _ _ = do
|
||||
needS3Creds u
|
||||
|
@ -726,3 +732,26 @@ getWebUrls info c k
|
|||
(True, Just geturl) -> return [geturl $ bucketObject info k]
|
||||
_ -> return []
|
||||
|
||||
newtype S3VersionID = S3VersionID String
|
||||
deriving (Show)
|
||||
|
||||
-- smart constructor
|
||||
mkS3VersionID :: Maybe T.Text -> Maybe S3VersionID
|
||||
mkS3VersionID = mkS3VersionID' . fmap T.unpack
|
||||
|
||||
mkS3VersionID' :: Maybe String -> Maybe S3VersionID
|
||||
mkS3VersionID' Nothing = Nothing
|
||||
mkS3VersionID' (Just s)
|
||||
| null s = Nothing
|
||||
-- AWS documentation says a version ID is at most 1024 bytes long.
|
||||
-- Since they are stored in the git-annex branch, prevent them from
|
||||
-- being very much larger than that.
|
||||
| length s < 2048 = Just (S3VersionID s)
|
||||
| otherwise = Nothing
|
||||
|
||||
setS3VersionID :: UUID -> Key -> Maybe S3VersionID -> Annex ()
|
||||
setS3VersionID u k (Just (S3VersionID v)) = setRemoteState u k v
|
||||
setS3VersionID _ _ Nothing = noop
|
||||
|
||||
getS3VersionID :: UUID -> Key -> Annex (Maybe S3VersionID)
|
||||
getS3VersionID u k = mkS3VersionID' <$> getRemoteState u k
|
||||
|
|
|
@ -61,7 +61,11 @@ Let S3 remotes be configured with versioning=yes which enables appendOnly.
|
|||
done
|
||||
|
||||
Make S3 store version IDs for exported files in the per-remote log when so
|
||||
configured, and use them for when retrieving keys and for checkpresent.
|
||||
configured. done
|
||||
|
||||
Use version IDs when retrieving keys and for checkpresent.
|
||||
|
||||
Can public urls be generated using version IDs?
|
||||
|
||||
When a file was deleted from an exported tree, and then put back
|
||||
in a later exported tree, it might get re-uploaded even though the content
|
||||
|
@ -80,3 +84,7 @@ keys that are not used in the current export doesn't help because another
|
|||
repository may have changed the exported tree and be relying on the dropped
|
||||
key being present in the export. So, DELETE from an appendonly export
|
||||
won't be supported, at least for now.
|
||||
|
||||
Another reason DELETE from appendonly is not supported is that only one
|
||||
version ID is stored per key, but the same key could have its content in
|
||||
the bucket multiple times under different version IDs.
|
||||
|
|
Loading…
Add table
Reference in a new issue