use per-remote metadata storage for S3 version ID

Since the same key can be stored in a versioned S3 bucket multiple times
with different version IDs, this allows tracking them all. Not currently
needed, but if we ever want to drop from a versioned S3 bucket, we'll
need to know them all.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2018-08-31 13:12:58 -04:00
parent 5c99f6247e
commit b3d42283ad
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 33 additions and 20 deletions

View file

@ -96,7 +96,7 @@ getCurrentMetaData' getlogfile k = do
showts = formatPOSIXTime "%F@%H-%M-%S" showts = formatPOSIXTime "%F@%H-%M-%S"
getCurrentRemoteMetaData :: UUID -> Key -> Annex RemoteMetaData getCurrentRemoteMetaData :: UUID -> Key -> Annex RemoteMetaData
getCurrentRemoteMetaData u k = mkRemoteMetaData u <$> getCurrentRemoteMetaData u k = extractRemoteMetaData u <$>
getCurrentMetaData' remoteMetaDataLogFile k getCurrentMetaData' remoteMetaDataLogFile k
{- Adds in some metadata, which can override existing values, or unset {- Adds in some metadata, which can override existing values, or unset

View file

@ -18,8 +18,9 @@ import qualified Aws.S3 as S3
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding as T
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S import qualified Data.ByteString as BS
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S
import Data.Char import Data.Char
import Network.Socket (HostName) import Network.Socket (HostName)
import Network.HTTP.Conduit (Manager) import Network.HTTP.Conduit (Manager)
@ -44,8 +45,9 @@ import Remote.Helper.Export
import qualified Remote.Helper.AWS as AWS import qualified Remote.Helper.AWS as AWS
import Creds import Creds
import Annex.UUID import Annex.UUID
import Logs.RemoteState
import Logs.Web import Logs.Web
import Logs.MetaData
import Types.MetaData
import Utility.Metered import Utility.Metered
import qualified Annex.Url as Url import qualified Annex.Url as Url
import Utility.DataUnits import Utility.DataUnits
@ -256,7 +258,7 @@ storeHelper info h f object p = case partSize info of
- that is difficult. -} - that is difficult. -}
retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever
retrieve r info (Just h) = fileRetriever $ \f k p -> do retrieve r info (Just h) = fileRetriever $ \f k p -> do
loc <- getS3VersionID info (uuid r) k (T.pack $ bucketObject info k) loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
retrieveHelper info h loc f p retrieveHelper info h loc f p
retrieve r info Nothing = case getpublicurl info of retrieve r info Nothing = case getpublicurl info of
Nothing -> \_ _ _ -> do Nothing -> \_ _ _ -> do
@ -298,7 +300,7 @@ checkKey r info Nothing k = case getpublicurl info of
checkBoth (geturl $ bucketObject info k) (keySize k) checkBoth (geturl $ bucketObject info k) (keySize k)
checkKey r info (Just h) k = do checkKey r info (Just h) k = do
showChecking r showChecking r
loc <- getS3VersionID info (uuid r) k (T.pack $ bucketObject info k) loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
checkKeyHelper info h loc checkKeyHelper info h loc
checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool
@ -717,7 +719,7 @@ debugMapper level t = forward "S3" (T.unpack t)
s3Info :: RemoteConfig -> S3Info -> [(String, String)] s3Info :: RemoteConfig -> S3Info -> [(String, String)]
s3Info c info = catMaybes s3Info c info = catMaybes
[ Just ("bucket", fromMaybe "unknown" (getBucketName c)) [ Just ("bucket", fromMaybe "unknown" (getBucketName c))
, Just ("endpoint", w82s (S.unpack (S3.s3Endpoint s3c))) , Just ("endpoint", w82s (BS.unpack (S3.s3Endpoint s3c)))
, Just ("port", show (S3.s3Port s3c)) , Just ("port", show (S3.s3Port s3c))
, Just ("storage class", showstorageclass (getStorageClass c)) , Just ("storage class", showstorageclass (getStorageClass c))
, if configIA c , if configIA c
@ -773,12 +775,27 @@ setS3VersionID info u k vid
| otherwise = noop | otherwise = noop
setS3VersionID' :: UUID -> Key -> S3VersionID -> Annex () setS3VersionID' :: UUID -> Key -> S3VersionID -> Annex ()
setS3VersionID' u k vid = setRemoteState u k (formatS3VersionID vid) setS3VersionID' u k vid = addRemoteMetaData k $
RemoteMetaData u (updateMetaData s3VersionField v emptyMetaData)
where
v = mkMetaValue (CurrentlySet True) (formatS3VersionID vid)
getS3VersionID :: S3Info -> UUID -> Key -> S3.Object -> Annex (Either S3.Object S3VersionID) getS3VersionID :: UUID -> Key -> Annex [S3VersionID]
getS3VersionID info u k fallback getS3VersionID u k = do
| versioning info = maybe (Left fallback) Right <$> getS3VersionID' u k (RemoteMetaData _ m) <- getCurrentRemoteMetaData u k
return $ mapMaybe parseS3VersionID $ map unwrap $ S.toList $
metaDataValues s3VersionField m
where
unwrap (MetaValue _ v) = v
s3VersionField :: MetaField
s3VersionField = mkMetaFieldUnchecked "V"
eitherS3VersionID :: S3Info -> UUID -> Key -> S3.Object -> Annex (Either S3.Object S3VersionID)
eitherS3VersionID info u k fallback
| versioning info = getS3VersionID u k >>= return . \case
[] -> Left fallback
-- It's possible for a key to be stored multiple timees in
-- a bucket with different version IDs; only use one of them.
(v:_) -> Right v
| otherwise = return (Left fallback) | otherwise = return (Left fallback)
getS3VersionID' :: UUID -> Key -> Annex (Maybe S3VersionID)
getS3VersionID' u k = maybe Nothing parseS3VersionID <$> getRemoteState u k

View file

@ -37,7 +37,7 @@ module Types.MetaData (
ModMeta(..), ModMeta(..),
modMeta, modMeta,
RemoteMetaData(..), RemoteMetaData(..),
mkRemoteMetaData, extractRemoteMetaData,
fromRemoteMetaData, fromRemoteMetaData,
prop_metadata_sane, prop_metadata_sane,
prop_metadata_serialize prop_metadata_serialize
@ -291,8 +291,8 @@ data RemoteMetaData = RemoteMetaData UUID MetaData
{- Extracts only the fields prefixed with "uuid:", which belong to that {- Extracts only the fields prefixed with "uuid:", which belong to that
- remote. -} - remote. -}
mkRemoteMetaData :: UUID -> MetaData -> RemoteMetaData extractRemoteMetaData :: UUID -> MetaData -> RemoteMetaData
mkRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $ extractRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
M.mapKeys removeprefix $ M.filterWithKey belongsremote m M.mapKeys removeprefix $ M.filterWithKey belongsremote m
where where
belongsremote (MetaField f) _v = prefix `isPrefixOf` CI.original f belongsremote (MetaField f) _v = prefix `isPrefixOf` CI.original f

View file

@ -82,7 +82,3 @@ keys that are not used in the current export doesn't help because another
repository may have changed the exported tree and be relying on the dropped repository may have changed the exported tree and be relying on the dropped
key being present in the export. Unless... Could export conflict resultion key being present in the export. Unless... Could export conflict resultion
somehow detect that? somehow detect that?
Another reason DELETE from appendonly is not supported is that only one
version ID is stored per key, but the same key could have its content in
the bucket multiple times under different version IDs.