use per-remote metadata storage for S3 version ID
Since the same key can be stored in a versioned S3 bucket multiple times with different version IDs, this allows tracking them all. Not currently needed, but if we ever want to drop from a versioned S3 bucket, we'll need to know them all. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
5c99f6247e
commit
b3d42283ad
4 changed files with 33 additions and 20 deletions
|
@ -96,7 +96,7 @@ getCurrentMetaData' getlogfile k = do
|
|||
showts = formatPOSIXTime "%F@%H-%M-%S"
|
||||
|
||||
getCurrentRemoteMetaData :: UUID -> Key -> Annex RemoteMetaData
|
||||
getCurrentRemoteMetaData u k = mkRemoteMetaData u <$>
|
||||
getCurrentRemoteMetaData u k = extractRemoteMetaData u <$>
|
||||
getCurrentMetaData' remoteMetaDataLogFile k
|
||||
|
||||
{- Adds in some metadata, which can override existing values, or unset
|
||||
|
|
41
Remote/S3.hs
41
Remote/S3.hs
|
@ -18,8 +18,9 @@ import qualified Aws.S3 as S3
|
|||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString as BS
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
import Data.Char
|
||||
import Network.Socket (HostName)
|
||||
import Network.HTTP.Conduit (Manager)
|
||||
|
@ -44,8 +45,9 @@ import Remote.Helper.Export
|
|||
import qualified Remote.Helper.AWS as AWS
|
||||
import Creds
|
||||
import Annex.UUID
|
||||
import Logs.RemoteState
|
||||
import Logs.Web
|
||||
import Logs.MetaData
|
||||
import Types.MetaData
|
||||
import Utility.Metered
|
||||
import qualified Annex.Url as Url
|
||||
import Utility.DataUnits
|
||||
|
@ -256,7 +258,7 @@ storeHelper info h f object p = case partSize info of
|
|||
- that is difficult. -}
|
||||
retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever
|
||||
retrieve r info (Just h) = fileRetriever $ \f k p -> do
|
||||
loc <- getS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
||||
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
||||
retrieveHelper info h loc f p
|
||||
retrieve r info Nothing = case getpublicurl info of
|
||||
Nothing -> \_ _ _ -> do
|
||||
|
@ -298,7 +300,7 @@ checkKey r info Nothing k = case getpublicurl info of
|
|||
checkBoth (geturl $ bucketObject info k) (keySize k)
|
||||
checkKey r info (Just h) k = do
|
||||
showChecking r
|
||||
loc <- getS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
||||
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
||||
checkKeyHelper info h loc
|
||||
|
||||
checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool
|
||||
|
@ -717,7 +719,7 @@ debugMapper level t = forward "S3" (T.unpack t)
|
|||
s3Info :: RemoteConfig -> S3Info -> [(String, String)]
|
||||
s3Info c info = catMaybes
|
||||
[ Just ("bucket", fromMaybe "unknown" (getBucketName c))
|
||||
, Just ("endpoint", w82s (S.unpack (S3.s3Endpoint s3c)))
|
||||
, Just ("endpoint", w82s (BS.unpack (S3.s3Endpoint s3c)))
|
||||
, Just ("port", show (S3.s3Port s3c))
|
||||
, Just ("storage class", showstorageclass (getStorageClass c))
|
||||
, if configIA c
|
||||
|
@ -773,12 +775,27 @@ setS3VersionID info u k vid
|
|||
| otherwise = noop
|
||||
|
||||
setS3VersionID' :: UUID -> Key -> S3VersionID -> Annex ()
|
||||
setS3VersionID' u k vid = setRemoteState u k (formatS3VersionID vid)
|
||||
setS3VersionID' u k vid = addRemoteMetaData k $
|
||||
RemoteMetaData u (updateMetaData s3VersionField v emptyMetaData)
|
||||
where
|
||||
v = mkMetaValue (CurrentlySet True) (formatS3VersionID vid)
|
||||
|
||||
getS3VersionID :: S3Info -> UUID -> Key -> S3.Object -> Annex (Either S3.Object S3VersionID)
|
||||
getS3VersionID info u k fallback
|
||||
| versioning info = maybe (Left fallback) Right <$> getS3VersionID' u k
|
||||
getS3VersionID :: UUID -> Key -> Annex [S3VersionID]
|
||||
getS3VersionID u k = do
|
||||
(RemoteMetaData _ m) <- getCurrentRemoteMetaData u k
|
||||
return $ mapMaybe parseS3VersionID $ map unwrap $ S.toList $
|
||||
metaDataValues s3VersionField m
|
||||
where
|
||||
unwrap (MetaValue _ v) = v
|
||||
|
||||
s3VersionField :: MetaField
|
||||
s3VersionField = mkMetaFieldUnchecked "V"
|
||||
|
||||
eitherS3VersionID :: S3Info -> UUID -> Key -> S3.Object -> Annex (Either S3.Object S3VersionID)
|
||||
eitherS3VersionID info u k fallback
|
||||
| versioning info = getS3VersionID u k >>= return . \case
|
||||
[] -> Left fallback
|
||||
-- It's possible for a key to be stored multiple timees in
|
||||
-- a bucket with different version IDs; only use one of them.
|
||||
(v:_) -> Right v
|
||||
| otherwise = return (Left fallback)
|
||||
|
||||
getS3VersionID' :: UUID -> Key -> Annex (Maybe S3VersionID)
|
||||
getS3VersionID' u k = maybe Nothing parseS3VersionID <$> getRemoteState u k
|
||||
|
|
|
@ -37,7 +37,7 @@ module Types.MetaData (
|
|||
ModMeta(..),
|
||||
modMeta,
|
||||
RemoteMetaData(..),
|
||||
mkRemoteMetaData,
|
||||
extractRemoteMetaData,
|
||||
fromRemoteMetaData,
|
||||
prop_metadata_sane,
|
||||
prop_metadata_serialize
|
||||
|
@ -291,8 +291,8 @@ data RemoteMetaData = RemoteMetaData UUID MetaData
|
|||
|
||||
{- Extracts only the fields prefixed with "uuid:", which belong to that
|
||||
- remote. -}
|
||||
mkRemoteMetaData :: UUID -> MetaData -> RemoteMetaData
|
||||
mkRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
|
||||
extractRemoteMetaData :: UUID -> MetaData -> RemoteMetaData
|
||||
extractRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
|
||||
M.mapKeys removeprefix $ M.filterWithKey belongsremote m
|
||||
where
|
||||
belongsremote (MetaField f) _v = prefix `isPrefixOf` CI.original f
|
||||
|
|
|
@ -82,7 +82,3 @@ keys that are not used in the current export doesn't help because another
|
|||
repository may have changed the exported tree and be relying on the dropped
|
||||
key being present in the export. Unless... Could export conflict resultion
|
||||
somehow detect that?
|
||||
|
||||
Another reason DELETE from appendonly is not supported is that only one
|
||||
version ID is stored per key, but the same key could have its content in
|
||||
the bucket multiple times under different version IDs.
|
||||
|
|
Loading…
Add table
Reference in a new issue