support public versioned S3 access
Makes git annex whereis display the versionId urls. And, when a s3 remote is enabled without creds, git-annex will use the versionId urls to access its contents. This commit was sponsored by Fernando Jimenez on Patreon.
This commit is contained in:
parent
0630ef166b
commit
b7daf2685f
3 changed files with 73 additions and 41 deletions
106
Remote/S3.hs
106
Remote/S3.hs
|
@ -72,14 +72,14 @@ remote = RemoteType
|
||||||
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
|
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
|
||||||
gen r u c gc = do
|
gen r u c gc = do
|
||||||
cst <- remoteCost gc expensiveRemoteCost
|
cst <- remoteCost gc expensiveRemoteCost
|
||||||
info <- extractS3Info c
|
info <- extractS3Info c u
|
||||||
return $ new cst info
|
return $ new cst info
|
||||||
where
|
where
|
||||||
new cst info = Just $ specialRemote c
|
new cst info = Just $ specialRemote c
|
||||||
(prepareS3Handle this $ store this info)
|
(prepareS3Handle this $ store this info)
|
||||||
(prepareS3HandleMaybe this $ retrieve this info)
|
(prepareS3HandleMaybe this $ retrieve this c info)
|
||||||
(prepareS3Handle this $ remove info)
|
(prepareS3Handle this $ remove info)
|
||||||
(prepareS3HandleMaybe this $ checkKey this info)
|
(prepareS3HandleMaybe this $ checkKey this c info)
|
||||||
this
|
this
|
||||||
where
|
where
|
||||||
this = Remote
|
this = Remote
|
||||||
|
@ -106,7 +106,7 @@ gen r u c gc = do
|
||||||
, removeExportDirectory = Nothing
|
, removeExportDirectory = Nothing
|
||||||
, renameExport = renameExportS3 u info mh
|
, renameExport = renameExportS3 u info mh
|
||||||
}
|
}
|
||||||
, whereisKey = Just (getWebUrls info c)
|
, whereisKey = Just (getPublicWebUrls info c)
|
||||||
, remoteFsck = Nothing
|
, remoteFsck = Nothing
|
||||||
, repairRepo = Nothing
|
, repairRepo = Nothing
|
||||||
, config = c
|
, config = c
|
||||||
|
@ -173,7 +173,7 @@ s3Setup' ss u mcreds c gc
|
||||||
M.union c' $
|
M.union c' $
|
||||||
-- special constraints on key names
|
-- special constraints on key names
|
||||||
M.insert "mungekeys" "ia" defaults
|
M.insert "mungekeys" "ia" defaults
|
||||||
info <- extractS3Info archiveconfig
|
info <- extractS3Info archiveconfig u
|
||||||
withS3Handle archiveconfig gc u $
|
withS3Handle archiveconfig gc u $
|
||||||
writeUUIDFile archiveconfig u info
|
writeUUIDFile archiveconfig u info
|
||||||
use archiveconfig
|
use archiveconfig
|
||||||
|
@ -257,16 +257,16 @@ storeHelper info h f object p = case partSize info of
|
||||||
{- Implemented as a fileRetriever, that uses conduit to stream the chunks
|
{- Implemented as a fileRetriever, that uses conduit to stream the chunks
|
||||||
- out to the file. Would be better to implement a byteRetriever, but
|
- out to the file. Would be better to implement a byteRetriever, but
|
||||||
- that is difficult. -}
|
- that is difficult. -}
|
||||||
retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever
|
retrieve :: Remote -> RemoteConfig -> S3Info -> Maybe S3Handle -> Retriever
|
||||||
retrieve r info (Just h) = fileRetriever $ \f k p -> do
|
retrieve r _ info (Just h) = fileRetriever $ \f k p -> do
|
||||||
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
||||||
retrieveHelper info h loc f p
|
retrieveHelper info h loc f p
|
||||||
retrieve r info Nothing = case getpublicurl info of
|
retrieve r c info Nothing = fileRetriever $ \f k p ->
|
||||||
Nothing -> \_ _ _ -> do
|
getPublicWebUrls info c k >>= \case
|
||||||
|
[] -> do
|
||||||
needS3Creds (uuid r)
|
needS3Creds (uuid r)
|
||||||
return False
|
giveup "No S3 credentials configured"
|
||||||
Just geturl -> fileRetriever $ \f k p ->
|
us -> unlessM (downloadUrl k p us f) $
|
||||||
unlessM (downloadUrl k p [geturl $ bucketObject info k] f) $
|
|
||||||
giveup "failed to download content"
|
giveup "failed to download content"
|
||||||
|
|
||||||
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()
|
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()
|
||||||
|
@ -290,19 +290,21 @@ remove info h k = do
|
||||||
S3.DeleteObject (T.pack $ bucketObject info k) (bucket info)
|
S3.DeleteObject (T.pack $ bucketObject info k) (bucket info)
|
||||||
return $ either (const False) (const True) res
|
return $ either (const False) (const True) res
|
||||||
|
|
||||||
checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent
|
checkKey :: Remote -> RemoteConfig -> S3Info -> Maybe S3Handle -> CheckPresent
|
||||||
checkKey r info Nothing k = case getpublicurl info of
|
checkKey r _ info (Just h) k = do
|
||||||
Nothing -> do
|
|
||||||
needS3Creds (uuid r)
|
|
||||||
giveup "No S3 credentials configured"
|
|
||||||
Just geturl -> do
|
|
||||||
showChecking r
|
|
||||||
withUrlOptions $ liftIO .
|
|
||||||
checkBoth (geturl $ bucketObject info k) (keySize k)
|
|
||||||
checkKey r info (Just h) k = do
|
|
||||||
showChecking r
|
showChecking r
|
||||||
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
loc <- eitherS3VersionID info (uuid r) k (T.pack $ bucketObject info k)
|
||||||
checkKeyHelper info h loc
|
checkKeyHelper info h loc
|
||||||
|
checkKey r c info Nothing k =
|
||||||
|
getPublicWebUrls info c k >>= \case
|
||||||
|
[] -> do
|
||||||
|
needS3Creds (uuid r)
|
||||||
|
giveup "No S3 credentials configured"
|
||||||
|
us -> do
|
||||||
|
showChecking r
|
||||||
|
let check u = withUrlOptions $
|
||||||
|
liftIO . checkBoth u (keySize k)
|
||||||
|
anyM check us
|
||||||
|
|
||||||
checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool
|
checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool
|
||||||
checkKeyHelper info h loc = do
|
checkKeyHelper info h loc = do
|
||||||
|
@ -413,7 +415,7 @@ renameExportS3 u _ Nothing _ _ _ = do
|
||||||
genBucket :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex ()
|
genBucket :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex ()
|
||||||
genBucket c gc u = do
|
genBucket c gc u = do
|
||||||
showAction "checking bucket"
|
showAction "checking bucket"
|
||||||
info <- extractS3Info c
|
info <- extractS3Info c u
|
||||||
withS3Handle c gc u $ \h ->
|
withS3Handle c gc u $ \h ->
|
||||||
go info h =<< checkUUIDFile c u info h
|
go info h =<< checkUUIDFile c u info h
|
||||||
where
|
where
|
||||||
|
@ -579,10 +581,11 @@ data S3Info = S3Info
|
||||||
, versioning :: Bool
|
, versioning :: Bool
|
||||||
, public :: Bool
|
, public :: Bool
|
||||||
, getpublicurl :: Maybe (BucketObject -> URLString)
|
, getpublicurl :: Maybe (BucketObject -> URLString)
|
||||||
|
, getpublicversionedurls :: Maybe (Key -> Annex [URLString])
|
||||||
}
|
}
|
||||||
|
|
||||||
extractS3Info :: RemoteConfig -> Annex S3Info
|
extractS3Info :: RemoteConfig -> UUID -> Annex S3Info
|
||||||
extractS3Info c = do
|
extractS3Info c u = do
|
||||||
b <- maybe
|
b <- maybe
|
||||||
(giveup "S3 bucket not configured")
|
(giveup "S3 bucket not configured")
|
||||||
(return . T.pack)
|
(return . T.pack)
|
||||||
|
@ -597,21 +600,32 @@ extractS3Info c = do
|
||||||
, isIA = configIA c
|
, isIA = configIA c
|
||||||
, versioning = boolcfg "versioning"
|
, versioning = boolcfg "versioning"
|
||||||
, public = boolcfg "public"
|
, public = boolcfg "public"
|
||||||
, getpublicurl = case M.lookup "publicurl" c of
|
, getpublicurl = case publicurl of
|
||||||
Just u -> Just $ \p -> genericPublicUrl p u
|
Just url -> Just (genericPublicUrl url)
|
||||||
Nothing -> case M.lookup "host" c of
|
Nothing -> case host of
|
||||||
Just h
|
Just h
|
||||||
| h == AWS.s3DefaultHost ->
|
| h == AWS.s3DefaultHost ->
|
||||||
Just (awsPublicUrl info)
|
Just (awsPublicUrl info)
|
||||||
| isIAHost h ->
|
| isIAHost h ->
|
||||||
Just (iaPublicUrl info)
|
Just (iaPublicUrl info)
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
, getpublicversionedurls = if versioning info
|
||||||
|
then case publicurl of
|
||||||
|
Just url -> Just $
|
||||||
|
getS3VersionIDPublicUrls (const $ genericPublicUrl url) info u
|
||||||
|
Nothing -> case host of
|
||||||
|
Just h | h == AWS.s3DefaultHost -> Just $
|
||||||
|
getS3VersionIDPublicUrls awsPublicUrl info u
|
||||||
|
_ -> Nothing
|
||||||
|
else Nothing
|
||||||
}
|
}
|
||||||
return info
|
return info
|
||||||
where
|
where
|
||||||
boolcfg k = case M.lookup k c of
|
boolcfg k = case M.lookup k c of
|
||||||
Just "yes" -> True
|
Just "yes" -> True
|
||||||
_ -> False
|
_ -> False
|
||||||
|
publicurl = M.lookup "publicurl" c
|
||||||
|
host = M.lookup "host" c
|
||||||
|
|
||||||
putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject
|
putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject
|
||||||
putObject info file rbody = (S3.putObject (bucket info) file rbody)
|
putObject info file rbody = (S3.putObject (bucket info) file rbody)
|
||||||
|
@ -687,15 +701,15 @@ iaItemUrl :: BucketName -> URLString
|
||||||
iaItemUrl b = "http://archive.org/details/" ++ b
|
iaItemUrl b = "http://archive.org/details/" ++ b
|
||||||
|
|
||||||
iaPublicUrl :: S3Info -> BucketObject -> URLString
|
iaPublicUrl :: S3Info -> BucketObject -> URLString
|
||||||
iaPublicUrl info p = genericPublicUrl p $
|
iaPublicUrl info = genericPublicUrl $
|
||||||
"http://archive.org/download/" ++ T.unpack (bucket info) ++ "/"
|
"http://archive.org/download/" ++ T.unpack (bucket info) ++ "/"
|
||||||
|
|
||||||
awsPublicUrl :: S3Info -> BucketObject -> URLString
|
awsPublicUrl :: S3Info -> BucketObject -> URLString
|
||||||
awsPublicUrl info p = genericPublicUrl p $
|
awsPublicUrl info = genericPublicUrl $
|
||||||
"https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/"
|
"https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/"
|
||||||
|
|
||||||
genericPublicUrl :: BucketObject -> URLString -> URLString
|
genericPublicUrl :: URLString -> BucketObject -> URLString
|
||||||
genericPublicUrl p baseurl = baseurl ++ p
|
genericPublicUrl baseurl p = baseurl ++ p
|
||||||
|
|
||||||
genCredentials :: CredPair -> IO AWS.Credentials
|
genCredentials :: CredPair -> IO AWS.Credentials
|
||||||
genCredentials (keyid, secret) = AWS.Credentials
|
genCredentials (keyid, secret) = AWS.Credentials
|
||||||
|
@ -730,6 +744,7 @@ s3Info c info = catMaybes
|
||||||
else Nothing
|
else Nothing
|
||||||
, Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
|
, Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
|
||||||
, Just ("public", if public info then "yes" else "no")
|
, Just ("public", if public info then "yes" else "no")
|
||||||
|
, Just ("versioning", if versioning info then "yes" else "no")
|
||||||
]
|
]
|
||||||
where
|
where
|
||||||
s3c = s3Configuration c
|
s3c = s3Configuration c
|
||||||
|
@ -738,12 +753,13 @@ s3Info c info = catMaybes
|
||||||
#endif
|
#endif
|
||||||
showstorageclass sc = show sc
|
showstorageclass sc = show sc
|
||||||
|
|
||||||
getWebUrls :: S3Info -> RemoteConfig -> Key -> Annex [URLString]
|
getPublicWebUrls :: S3Info -> RemoteConfig -> Key -> Annex [URLString]
|
||||||
getWebUrls info c k
|
getPublicWebUrls info c k
|
||||||
| exportTree c = return []
|
| not (public info) = return []
|
||||||
| otherwise = case (public info, getpublicurl info) of
|
| exportTree c = maybe (return []) (\a -> a k) (getpublicversionedurls info)
|
||||||
(True, Just geturl) -> return [geturl $ bucketObject info k]
|
| otherwise = case getpublicurl info of
|
||||||
_ -> return []
|
Just geturl -> return [geturl $ bucketObject info k]
|
||||||
|
Nothing -> return []
|
||||||
|
|
||||||
data S3VersionID = S3VersionID S3.Object String
|
data S3VersionID = S3VersionID S3.Object String
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
@ -762,12 +778,15 @@ mkS3VersionID' o (Just s)
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
mkS3VersionID' _ Nothing = Nothing
|
mkS3VersionID' _ Nothing = Nothing
|
||||||
|
|
||||||
|
-- Format for storage in per-remote metadata.
|
||||||
|
--
|
||||||
-- A S3 version ID is "url ready" so does not contain '#' and so we'll use
|
-- A S3 version ID is "url ready" so does not contain '#' and so we'll use
|
||||||
-- that to separate it from the object id. (Could use a space, but spaces
|
-- that to separate it from the object id. (Could use a space, but spaces
|
||||||
-- in metadata values lead to an inefficient encoding.)
|
-- in metadata values lead to an inefficient encoding.)
|
||||||
formatS3VersionID :: S3VersionID -> String
|
formatS3VersionID :: S3VersionID -> String
|
||||||
formatS3VersionID (S3VersionID o v) = v ++ '#' : T.unpack o
|
formatS3VersionID (S3VersionID o v) = v ++ '#' : T.unpack o
|
||||||
|
|
||||||
|
-- Parse from value stored in per-remote metadata.
|
||||||
parseS3VersionID :: String -> Maybe S3VersionID
|
parseS3VersionID :: String -> Maybe S3VersionID
|
||||||
parseS3VersionID s =
|
parseS3VersionID s =
|
||||||
let (v, o) = separate (== '#') s
|
let (v, o) = separate (== '#') s
|
||||||
|
@ -803,3 +822,14 @@ eitherS3VersionID info u k fallback
|
||||||
-- a bucket with different version IDs; only use one of them.
|
-- a bucket with different version IDs; only use one of them.
|
||||||
(v:_) -> Right v
|
(v:_) -> Right v
|
||||||
| otherwise = return (Left fallback)
|
| otherwise = return (Left fallback)
|
||||||
|
|
||||||
|
s3VersionIDPublicUrl :: (S3Info -> BucketObject -> URLString) -> S3Info -> S3VersionID -> URLString
|
||||||
|
s3VersionIDPublicUrl mk info (S3VersionID obj vid) = mk info $ concat
|
||||||
|
[ T.unpack obj
|
||||||
|
, "?versionId="
|
||||||
|
, vid -- version ID is "url ready" so no escaping needed
|
||||||
|
]
|
||||||
|
|
||||||
|
getS3VersionIDPublicUrls :: (S3Info -> BucketObject -> URLString) -> S3Info -> UUID -> Key -> Annex [URLString]
|
||||||
|
getS3VersionIDPublicUrls mk info u k =
|
||||||
|
map (s3VersionIDPublicUrl mk info) <$> getS3VersionID u k
|
||||||
|
|
|
@ -77,8 +77,8 @@ the S3 remote.
|
||||||
|
|
||||||
* `versioning` - Setting this to "yes" along with "exporttree=yes",
|
* `versioning` - Setting this to "yes" along with "exporttree=yes",
|
||||||
and [manually enabling versioning for the S3 bucket in the AWS console](https://docs.aws.amazon.com/AmazonS3/latest/user-guide/enable-versioning.html)
|
and [manually enabling versioning for the S3 bucket in the AWS console](https://docs.aws.amazon.com/AmazonS3/latest/user-guide/enable-versioning.html)
|
||||||
allows git-annex to access old versions of files exported to the
|
allows git-annex to access old versions of files that were
|
||||||
special remote with [[git-annex export|git-annex-export]].
|
exported to the special remote by [[git-annex export|git-annex-export]].
|
||||||
|
|
||||||
Note that git-annex needs to remember S3 version IDs for files
|
Note that git-annex needs to remember S3 version IDs for files
|
||||||
sent to a remote configured this way, which will make the git-annex
|
sent to a remote configured this way, which will make the git-annex
|
||||||
|
|
|
@ -5,3 +5,5 @@ does not yet support that, nor does whereis show the urls.
|
||||||
Should not be super hard to add, but it involves converting `getpublicurl`
|
Should not be super hard to add, but it involves converting `getpublicurl`
|
||||||
into an Annex action and distinguishing between different uses of it,
|
into an Annex action and distinguishing between different uses of it,
|
||||||
some of which work with this and some don't. --[[Joey]]
|
some of which work with this and some don't. --[[Joey]]
|
||||||
|
|
||||||
|
> [[done]] --[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue