use per-remote metadata storage for S3 version ID
Since the same key can be stored in a versioned S3 bucket multiple times with different version IDs, this allows tracking them all. Not currently needed, but if we ever want to drop from a versioned S3 bucket, we'll need to know them all. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
5c99f6247e
commit
b3d42283ad
4 changed files with 33 additions and 20 deletions
|
@ -96,7 +96,7 @@ getCurrentMetaData' getlogfile k = do
|
||||||
showts = formatPOSIXTime "%F@%H-%M-%S"
|
showts = formatPOSIXTime "%F@%H-%M-%S"
|
||||||
|
|
||||||
getCurrentRemoteMetaData :: UUID -> Key -> Annex RemoteMetaData
|
getCurrentRemoteMetaData :: UUID -> Key -> Annex RemoteMetaData
|
||||||
getCurrentRemoteMetaData u k = mkRemoteMetaData u <$>
|
getCurrentRemoteMetaData u k = extractRemoteMetaData u <$>
|
||||||
getCurrentMetaData' remoteMetaDataLogFile k
|
getCurrentMetaData' remoteMetaDataLogFile k
|
||||||
|
|
||||||
{- Adds in some metadata, which can override existing values, or unset
|
{- Adds in some metadata, which can override existing values, or unset
|
||||||
|
|
41
Remote/S3.hs
41
Remote/S3.hs
|
@ -18,8 +18,9 @@ import qualified Aws.S3 as S3
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import qualified Data.Text.Encoding as T
|
import qualified Data.Text.Encoding as T
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
import qualified Data.ByteString as S
|
import qualified Data.ByteString as BS
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.Set as S
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Network.Socket (HostName)
|
import Network.Socket (HostName)
|
||||||
import Network.HTTP.Conduit (Manager)
|
import Network.HTTP.Conduit (Manager)
|
||||||
|
@ -44,8 +45,9 @@ import Remote.Helper.Export
|
||||||
import qualified Remote.Helper.AWS as AWS
|
import qualified Remote.Helper.AWS as AWS
|
||||||
import Creds
|
import Creds
|
||||||
import Annex.UUID
|
import Annex.UUID
|
||||||
import Logs.RemoteState
|
|
||||||
import Logs.Web
|
import Logs.Web
|
||||||
|
import Logs.MetaData
|
||||||
|
import Types.MetaData
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
import qualified Annex.Url as Url
|
import qualified Annex.Url as Url
|
||||||
import Utility.DataUnits
|
import Utility.DataUnits
|
||||||
|
@ -256,7 +258,7 @@ storeHelper info h f object p = case partSize info of
|
||||||
- that is difficult. -}
|
- that is difficult. -}
|
||||||
retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever
|
retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever
|
||||||
retrieve r info (Just h) = fileRetriever $ \f k p -> do
|
retrieve r info (Just h) = fileRetriever $ \f k p -> do
|
||||||
loc <- getS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
||||||
retrieveHelper info h loc f p
|
retrieveHelper info h loc f p
|
||||||
retrieve r info Nothing = case getpublicurl info of
|
retrieve r info Nothing = case getpublicurl info of
|
||||||
Nothing -> \_ _ _ -> do
|
Nothing -> \_ _ _ -> do
|
||||||
|
@ -298,7 +300,7 @@ checkKey r info Nothing k = case getpublicurl info of
|
||||||
checkBoth (geturl $ bucketObject info k) (keySize k)
|
checkBoth (geturl $ bucketObject info k) (keySize k)
|
||||||
checkKey r info (Just h) k = do
|
checkKey r info (Just h) k = do
|
||||||
showChecking r
|
showChecking r
|
||||||
loc <- getS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
||||||
checkKeyHelper info h loc
|
checkKeyHelper info h loc
|
||||||
|
|
||||||
checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool
|
checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool
|
||||||
|
@ -717,7 +719,7 @@ debugMapper level t = forward "S3" (T.unpack t)
|
||||||
s3Info :: RemoteConfig -> S3Info -> [(String, String)]
|
s3Info :: RemoteConfig -> S3Info -> [(String, String)]
|
||||||
s3Info c info = catMaybes
|
s3Info c info = catMaybes
|
||||||
[ Just ("bucket", fromMaybe "unknown" (getBucketName c))
|
[ Just ("bucket", fromMaybe "unknown" (getBucketName c))
|
||||||
, Just ("endpoint", w82s (S.unpack (S3.s3Endpoint s3c)))
|
, Just ("endpoint", w82s (BS.unpack (S3.s3Endpoint s3c)))
|
||||||
, Just ("port", show (S3.s3Port s3c))
|
, Just ("port", show (S3.s3Port s3c))
|
||||||
, Just ("storage class", showstorageclass (getStorageClass c))
|
, Just ("storage class", showstorageclass (getStorageClass c))
|
||||||
, if configIA c
|
, if configIA c
|
||||||
|
@ -773,12 +775,27 @@ setS3VersionID info u k vid
|
||||||
| otherwise = noop
|
| otherwise = noop
|
||||||
|
|
||||||
setS3VersionID' :: UUID -> Key -> S3VersionID -> Annex ()
|
setS3VersionID' :: UUID -> Key -> S3VersionID -> Annex ()
|
||||||
setS3VersionID' u k vid = setRemoteState u k (formatS3VersionID vid)
|
setS3VersionID' u k vid = addRemoteMetaData k $
|
||||||
|
RemoteMetaData u (updateMetaData s3VersionField v emptyMetaData)
|
||||||
|
where
|
||||||
|
v = mkMetaValue (CurrentlySet True) (formatS3VersionID vid)
|
||||||
|
|
||||||
getS3VersionID :: S3Info -> UUID -> Key -> S3.Object -> Annex (Either S3.Object S3VersionID)
|
getS3VersionID :: UUID -> Key -> Annex [S3VersionID]
|
||||||
getS3VersionID info u k fallback
|
getS3VersionID u k = do
|
||||||
| versioning info = maybe (Left fallback) Right <$> getS3VersionID' u k
|
(RemoteMetaData _ m) <- getCurrentRemoteMetaData u k
|
||||||
|
return $ mapMaybe parseS3VersionID $ map unwrap $ S.toList $
|
||||||
|
metaDataValues s3VersionField m
|
||||||
|
where
|
||||||
|
unwrap (MetaValue _ v) = v
|
||||||
|
|
||||||
|
s3VersionField :: MetaField
|
||||||
|
s3VersionField = mkMetaFieldUnchecked "V"
|
||||||
|
|
||||||
|
eitherS3VersionID :: S3Info -> UUID -> Key -> S3.Object -> Annex (Either S3.Object S3VersionID)
|
||||||
|
eitherS3VersionID info u k fallback
|
||||||
|
| versioning info = getS3VersionID u k >>= return . \case
|
||||||
|
[] -> Left fallback
|
||||||
|
-- It's possible for a key to be stored multiple timees in
|
||||||
|
-- a bucket with different version IDs; only use one of them.
|
||||||
|
(v:_) -> Right v
|
||||||
| otherwise = return (Left fallback)
|
| otherwise = return (Left fallback)
|
||||||
|
|
||||||
getS3VersionID' :: UUID -> Key -> Annex (Maybe S3VersionID)
|
|
||||||
getS3VersionID' u k = maybe Nothing parseS3VersionID <$> getRemoteState u k
|
|
||||||
|
|
|
@ -37,7 +37,7 @@ module Types.MetaData (
|
||||||
ModMeta(..),
|
ModMeta(..),
|
||||||
modMeta,
|
modMeta,
|
||||||
RemoteMetaData(..),
|
RemoteMetaData(..),
|
||||||
mkRemoteMetaData,
|
extractRemoteMetaData,
|
||||||
fromRemoteMetaData,
|
fromRemoteMetaData,
|
||||||
prop_metadata_sane,
|
prop_metadata_sane,
|
||||||
prop_metadata_serialize
|
prop_metadata_serialize
|
||||||
|
@ -291,8 +291,8 @@ data RemoteMetaData = RemoteMetaData UUID MetaData
|
||||||
|
|
||||||
{- Extracts only the fields prefixed with "uuid:", which belong to that
|
{- Extracts only the fields prefixed with "uuid:", which belong to that
|
||||||
- remote. -}
|
- remote. -}
|
||||||
mkRemoteMetaData :: UUID -> MetaData -> RemoteMetaData
|
extractRemoteMetaData :: UUID -> MetaData -> RemoteMetaData
|
||||||
mkRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
|
extractRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
|
||||||
M.mapKeys removeprefix $ M.filterWithKey belongsremote m
|
M.mapKeys removeprefix $ M.filterWithKey belongsremote m
|
||||||
where
|
where
|
||||||
belongsremote (MetaField f) _v = prefix `isPrefixOf` CI.original f
|
belongsremote (MetaField f) _v = prefix `isPrefixOf` CI.original f
|
||||||
|
|
|
@ -82,7 +82,3 @@ keys that are not used in the current export doesn't help because another
|
||||||
repository may have changed the exported tree and be relying on the dropped
|
repository may have changed the exported tree and be relying on the dropped
|
||||||
key being present in the export. Unless... Could export conflict resultion
|
key being present in the export. Unless... Could export conflict resultion
|
||||||
somehow detect that?
|
somehow detect that?
|
||||||
|
|
||||||
Another reason DELETE from appendonly is not supported is that only one
|
|
||||||
version ID is stored per key, but the same key could have its content in
|
|
||||||
the bucket multiple times under different version IDs.
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue