support public versioned S3 access

Makes git annex whereis display the versionId urls.

And, when a s3 remote is enabled without creds, git-annex will use the
versionId urls to access its contents.

This commit was sponsored by Fernando Jimenez on Patreon.
This commit is contained in:
Joey Hess 2018-09-06 14:31:41 -04:00
parent 0630ef166b
commit b7daf2685f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 73 additions and 41 deletions

View file

@ -72,14 +72,14 @@ remote = RemoteType
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do gen r u c gc = do
cst <- remoteCost gc expensiveRemoteCost cst <- remoteCost gc expensiveRemoteCost
info <- extractS3Info c info <- extractS3Info c u
return $ new cst info return $ new cst info
where where
new cst info = Just $ specialRemote c new cst info = Just $ specialRemote c
(prepareS3Handle this $ store this info) (prepareS3Handle this $ store this info)
(prepareS3HandleMaybe this $ retrieve this info) (prepareS3HandleMaybe this $ retrieve this c info)
(prepareS3Handle this $ remove info) (prepareS3Handle this $ remove info)
(prepareS3HandleMaybe this $ checkKey this info) (prepareS3HandleMaybe this $ checkKey this c info)
this this
where where
this = Remote this = Remote
@ -106,7 +106,7 @@ gen r u c gc = do
, removeExportDirectory = Nothing , removeExportDirectory = Nothing
, renameExport = renameExportS3 u info mh , renameExport = renameExportS3 u info mh
} }
, whereisKey = Just (getWebUrls info c) , whereisKey = Just (getPublicWebUrls info c)
, remoteFsck = Nothing , remoteFsck = Nothing
, repairRepo = Nothing , repairRepo = Nothing
, config = c , config = c
@ -173,7 +173,7 @@ s3Setup' ss u mcreds c gc
M.union c' $ M.union c' $
-- special constraints on key names -- special constraints on key names
M.insert "mungekeys" "ia" defaults M.insert "mungekeys" "ia" defaults
info <- extractS3Info archiveconfig info <- extractS3Info archiveconfig u
withS3Handle archiveconfig gc u $ withS3Handle archiveconfig gc u $
writeUUIDFile archiveconfig u info writeUUIDFile archiveconfig u info
use archiveconfig use archiveconfig
@ -257,16 +257,16 @@ storeHelper info h f object p = case partSize info of
{- Implemented as a fileRetriever, that uses conduit to stream the chunks {- Implemented as a fileRetriever, that uses conduit to stream the chunks
- out to the file. Would be better to implement a byteRetriever, but - out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -} - that is difficult. -}
retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever retrieve :: Remote -> RemoteConfig -> S3Info -> Maybe S3Handle -> Retriever
retrieve r info (Just h) = fileRetriever $ \f k p -> do retrieve r _ info (Just h) = fileRetriever $ \f k p -> do
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k) loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
retrieveHelper info h loc f p retrieveHelper info h loc f p
retrieve r info Nothing = case getpublicurl info of retrieve r c info Nothing = fileRetriever $ \f k p ->
Nothing -> \_ _ _ -> do getPublicWebUrls info c k >>= \case
[] -> do
needS3Creds (uuid r) needS3Creds (uuid r)
return False giveup "No S3 credentials configured"
Just geturl -> fileRetriever $ \f k p -> us -> unlessM (downloadUrl k p us f) $
unlessM (downloadUrl k p [geturl $ bucketObject info k] f) $
giveup "failed to download content" giveup "failed to download content"
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex () retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()
@ -290,19 +290,21 @@ remove info h k = do
S3.DeleteObject (T.pack $ bucketObject info k) (bucket info) S3.DeleteObject (T.pack $ bucketObject info k) (bucket info)
return $ either (const False) (const True) res return $ either (const False) (const True) res
checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent checkKey :: Remote -> RemoteConfig -> S3Info -> Maybe S3Handle -> CheckPresent
checkKey r info Nothing k = case getpublicurl info of checkKey r _ info (Just h) k = do
Nothing -> do
needS3Creds (uuid r)
giveup "No S3 credentials configured"
Just geturl -> do
showChecking r
withUrlOptions $ liftIO .
checkBoth (geturl $ bucketObject info k) (keySize k)
checkKey r info (Just h) k = do
showChecking r showChecking r
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k) loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
checkKeyHelper info h loc checkKeyHelper info h loc
checkKey r c info Nothing k =
getPublicWebUrls info c k >>= \case
[] -> do
needS3Creds (uuid r)
giveup "No S3 credentials configured"
us -> do
showChecking r
let check u = withUrlOptions $
liftIO . checkBoth u (keySize k)
anyM check us
checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool
checkKeyHelper info h loc = do checkKeyHelper info h loc = do
@ -413,7 +415,7 @@ renameExportS3 u _ Nothing _ _ _ = do
genBucket :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex () genBucket :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex ()
genBucket c gc u = do genBucket c gc u = do
showAction "checking bucket" showAction "checking bucket"
info <- extractS3Info c info <- extractS3Info c u
withS3Handle c gc u $ \h -> withS3Handle c gc u $ \h ->
go info h =<< checkUUIDFile c u info h go info h =<< checkUUIDFile c u info h
where where
@ -579,10 +581,11 @@ data S3Info = S3Info
, versioning :: Bool , versioning :: Bool
, public :: Bool , public :: Bool
, getpublicurl :: Maybe (BucketObject -> URLString) , getpublicurl :: Maybe (BucketObject -> URLString)
, getpublicversionedurls :: Maybe (Key -> Annex [URLString])
} }
extractS3Info :: RemoteConfig -> Annex S3Info extractS3Info :: RemoteConfig -> UUID -> Annex S3Info
extractS3Info c = do extractS3Info c u = do
b <- maybe b <- maybe
(giveup "S3 bucket not configured") (giveup "S3 bucket not configured")
(return . T.pack) (return . T.pack)
@ -597,21 +600,32 @@ extractS3Info c = do
, isIA = configIA c , isIA = configIA c
, versioning = boolcfg "versioning" , versioning = boolcfg "versioning"
, public = boolcfg "public" , public = boolcfg "public"
, getpublicurl = case M.lookup "publicurl" c of , getpublicurl = case publicurl of
Just u -> Just $ \p -> genericPublicUrl p u Just url -> Just (genericPublicUrl url)
Nothing -> case M.lookup "host" c of Nothing -> case host of
Just h Just h
| h == AWS.s3DefaultHost -> | h == AWS.s3DefaultHost ->
Just (awsPublicUrl info) Just (awsPublicUrl info)
| isIAHost h -> | isIAHost h ->
Just (iaPublicUrl info) Just (iaPublicUrl info)
_ -> Nothing _ -> Nothing
, getpublicversionedurls = if versioning info
then case publicurl of
Just url -> Just $
getS3VersionIDPublicUrls (const $ genericPublicUrl url) info u
Nothing -> case host of
Just h | h == AWS.s3DefaultHost -> Just $
getS3VersionIDPublicUrls awsPublicUrl info u
_ -> Nothing
else Nothing
} }
return info return info
where where
boolcfg k = case M.lookup k c of boolcfg k = case M.lookup k c of
Just "yes" -> True Just "yes" -> True
_ -> False _ -> False
publicurl = M.lookup "publicurl" c
host = M.lookup "host" c
putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject
putObject info file rbody = (S3.putObject (bucket info) file rbody) putObject info file rbody = (S3.putObject (bucket info) file rbody)
@ -687,15 +701,15 @@ iaItemUrl :: BucketName -> URLString
iaItemUrl b = "http://archive.org/details/" ++ b iaItemUrl b = "http://archive.org/details/" ++ b
iaPublicUrl :: S3Info -> BucketObject -> URLString iaPublicUrl :: S3Info -> BucketObject -> URLString
iaPublicUrl info p = genericPublicUrl p $ iaPublicUrl info = genericPublicUrl $
"http://archive.org/download/" ++ T.unpack (bucket info) ++ "/" "http://archive.org/download/" ++ T.unpack (bucket info) ++ "/"
awsPublicUrl :: S3Info -> BucketObject -> URLString awsPublicUrl :: S3Info -> BucketObject -> URLString
awsPublicUrl info p = genericPublicUrl p $ awsPublicUrl info = genericPublicUrl $
"https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/" "https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/"
genericPublicUrl :: BucketObject -> URLString -> URLString genericPublicUrl :: URLString -> BucketObject -> URLString
genericPublicUrl p baseurl = baseurl ++ p genericPublicUrl baseurl p = baseurl ++ p
genCredentials :: CredPair -> IO AWS.Credentials genCredentials :: CredPair -> IO AWS.Credentials
genCredentials (keyid, secret) = AWS.Credentials genCredentials (keyid, secret) = AWS.Credentials
@ -730,6 +744,7 @@ s3Info c info = catMaybes
else Nothing else Nothing
, Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c)) , Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
, Just ("public", if public info then "yes" else "no") , Just ("public", if public info then "yes" else "no")
, Just ("versioning", if versioning info then "yes" else "no")
] ]
where where
s3c = s3Configuration c s3c = s3Configuration c
@ -738,12 +753,13 @@ s3Info c info = catMaybes
#endif #endif
showstorageclass sc = show sc showstorageclass sc = show sc
getWebUrls :: S3Info -> RemoteConfig -> Key -> Annex [URLString] getPublicWebUrls :: S3Info -> RemoteConfig -> Key -> Annex [URLString]
getWebUrls info c k getPublicWebUrls info c k
| exportTree c = return [] | not (public info) = return []
| otherwise = case (public info, getpublicurl info) of | exportTree c = maybe (return []) (\a -> a k) (getpublicversionedurls info)
(True, Just geturl) -> return [geturl $ bucketObject info k] | otherwise = case getpublicurl info of
_ -> return [] Just geturl -> return [geturl $ bucketObject info k]
Nothing -> return []
data S3VersionID = S3VersionID S3.Object String data S3VersionID = S3VersionID S3.Object String
deriving (Show) deriving (Show)
@ -762,12 +778,15 @@ mkS3VersionID' o (Just s)
| otherwise = Nothing | otherwise = Nothing
mkS3VersionID' _ Nothing = Nothing mkS3VersionID' _ Nothing = Nothing
-- Format for storage in per-remote metadata.
--
-- A S3 version ID is "url ready" so does not contain '#' and so we'll use -- A S3 version ID is "url ready" so does not contain '#' and so we'll use
-- that to separate it from the object id. (Could use a space, but spaces -- that to separate it from the object id. (Could use a space, but spaces
-- in metadata values lead to an inefficient encoding.) -- in metadata values lead to an inefficient encoding.)
formatS3VersionID :: S3VersionID -> String formatS3VersionID :: S3VersionID -> String
formatS3VersionID (S3VersionID o v) = v ++ '#' : T.unpack o formatS3VersionID (S3VersionID o v) = v ++ '#' : T.unpack o
-- Parse from value stored in per-remote metadata.
parseS3VersionID :: String -> Maybe S3VersionID parseS3VersionID :: String -> Maybe S3VersionID
parseS3VersionID s = parseS3VersionID s =
let (v, o) = separate (== '#') s let (v, o) = separate (== '#') s
@ -803,3 +822,14 @@ eitherS3VersionID info u k fallback
-- a bucket with different version IDs; only use one of them. -- a bucket with different version IDs; only use one of them.
(v:_) -> Right v (v:_) -> Right v
| otherwise = return (Left fallback) | otherwise = return (Left fallback)
s3VersionIDPublicUrl :: (S3Info -> BucketObject -> URLString) -> S3Info -> S3VersionID -> URLString
s3VersionIDPublicUrl mk info (S3VersionID obj vid) = mk info $ concat
[ T.unpack obj
, "?versionId="
, vid -- version ID is "url ready" so no escaping needed
]
getS3VersionIDPublicUrls :: (S3Info -> BucketObject -> URLString) -> S3Info -> UUID -> Key -> Annex [URLString]
getS3VersionIDPublicUrls mk info u k =
map (s3VersionIDPublicUrl mk info) <$> getS3VersionID u k

View file

@ -77,8 +77,8 @@ the S3 remote.
* `versioning` - Setting this to "yes" along with "exporttree=yes", * `versioning` - Setting this to "yes" along with "exporttree=yes",
and [manually enabling versioning for the S3 bucket in the AWS console](https://docs.aws.amazon.com/AmazonS3/latest/user-guide/enable-versioning.html) and [manually enabling versioning for the S3 bucket in the AWS console](https://docs.aws.amazon.com/AmazonS3/latest/user-guide/enable-versioning.html)
allows git-annex to access old versions of files exported to the allows git-annex to access old versions of files that were
special remote with [[git-annex export|git-annex-export]]. exported to the special remote by [[git-annex export|git-annex-export]].
Note that git-annex needs to remember S3 version IDs for files Note that git-annex needs to remember S3 version IDs for files
sent to a remote configured this way, which will make the git-annex sent to a remote configured this way, which will make the git-annex

View file

@ -5,3 +5,5 @@ does not yet support that, nor does whereis show the urls.
Should not be super hard to add, but it involves converting `getpublicurl` Should not be super hard to add, but it involves converting `getpublicurl`
into an Annex action and distinguishing between different uses of it, into an Annex action and distinguishing between different uses of it,
some of which work with this and some don't. --[[Joey]] some of which work with this and some don't. --[[Joey]]
> [[done]] --[[Joey]]