S3: Support credential-less download from remotes configured with public=yes exporttree=yes.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2018-07-31 16:29:11 -04:00
parent 903b10e2b2
commit 2884637cab
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 73 additions and 41 deletions

View file

@ -1,5 +1,7 @@
git-annex (6.20180720) UNRELEASED; urgency=medium git-annex (6.20180720) UNRELEASED; urgency=medium
* S3: Support credential-less download from remotes configured
with public=yes exporttree=yes.
* Fix reversion in display of http 404 errors. * Fix reversion in display of http 404 errors.
-- Joey Hess <id@joeyh.name> Tue, 31 Jul 2018 12:14:11 -0400 -- Joey Hess <id@joeyh.name> Tue, 31 Jul 2018 12:14:11 -0400

View file

@ -46,7 +46,7 @@ import Creds
import Annex.UUID import Annex.UUID
import Logs.Web import Logs.Web
import Utility.Metered import Utility.Metered
import qualified Utility.Url as Url import qualified Annex.Url as Url
import Utility.DataUnits import Utility.DataUnits
import Utility.FileSystemEncoding import Utility.FileSystemEncoding
import Annex.Content import Annex.Content
@ -54,6 +54,7 @@ import Annex.Url (withUrlOptions)
import Utility.Url (checkBoth, UrlOptions(..)) import Utility.Url (checkBoth, UrlOptions(..))
type BucketName = String type BucketName = String
type BucketObject = String
remote :: RemoteType remote :: RemoteType
remote = RemoteType remote = RemoteType
@ -91,15 +92,15 @@ gen r u c gc = do
, lockContent = Nothing , lockContent = Nothing
, checkPresent = checkPresentDummy , checkPresent = checkPresentDummy
, checkPresentCheap = False , checkPresentCheap = False
, exportActions = withS3Handle c gc u $ \h -> , exportActions = withS3HandleMaybe c gc u $ \mh ->
return $ ExportActions return $ ExportActions
{ storeExport = storeExportS3 info h { storeExport = storeExportS3 u info mh
, retrieveExport = retrieveExportS3 info h , retrieveExport = retrieveExportS3 u info mh
, removeExport = removeExportS3 info h , removeExport = removeExportS3 u info mh
, checkPresentExport = checkPresentExportS3 info h , checkPresentExport = checkPresentExportS3 u info mh
-- S3 does not have directories. -- S3 does not have directories.
, removeExportDirectory = Nothing , removeExportDirectory = Nothing
, renameExport = renameExportS3 info h , renameExport = renameExportS3 u info mh
} }
, whereisKey = Just (getWebUrls info c) , whereisKey = Just (getWebUrls info c)
, remoteFsck = Nothing , remoteFsck = Nothing
@ -188,7 +189,7 @@ store _r info h = fileStorer $ \k f p -> do
storeHelper info h f (T.pack $ bucketObject info k) p storeHelper info h f (T.pack $ bucketObject info k) p
-- Store public URL to item in Internet Archive. -- Store public URL to item in Internet Archive.
when (isIA info && not (isChunkKey k)) $ when (isIA info && not (isChunkKey k)) $
setUrlPresent webUUID k (iaPublicKeyUrl info k) setUrlPresent webUUID k (iaPublicUrl info (bucketObject info k))
return True return True
storeHelper :: S3Info -> S3Handle -> FilePath -> S3.Object -> MeterUpdate -> Annex () storeHelper :: S3Info -> S3Handle -> FilePath -> S3.Object -> MeterUpdate -> Annex ()
@ -254,10 +255,10 @@ retrieve _ info (Just h) = fileRetriever $ \f k p ->
retrieveHelper info h (T.pack $ bucketObject info k) f p retrieveHelper info h (T.pack $ bucketObject info k) f p
retrieve r info Nothing = case getpublicurl info of retrieve r info Nothing = case getpublicurl info of
Nothing -> \_ _ _ -> do Nothing -> \_ _ _ -> do
warnMissingCredPairFor "S3" (AWS.creds $ uuid r) needS3Creds (uuid r)
return False return False
Just geturl -> fileRetriever $ \f k p -> Just geturl -> fileRetriever $ \f k p ->
unlessM (downloadUrl k p [geturl k] f) $ unlessM (downloadUrl k p [geturl $ bucketObject info k] f) $
giveup "failed to download content" giveup "failed to download content"
retrieveHelper :: S3Info -> S3Handle -> S3.Object -> FilePath -> MeterUpdate -> Annex () retrieveHelper :: S3Info -> S3Handle -> S3.Object -> FilePath -> MeterUpdate -> Annex ()
@ -281,11 +282,12 @@ remove info h k = do
checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent
checkKey r info Nothing k = case getpublicurl info of checkKey r info Nothing k = case getpublicurl info of
Nothing -> do Nothing -> do
warnMissingCredPairFor "S3" (AWS.creds $ uuid r) needS3Creds (uuid r)
giveup "No S3 credentials configured" giveup "No S3 credentials configured"
Just geturl -> do Just geturl -> do
showChecking r showChecking r
withUrlOptions $ liftIO . checkBoth (geturl k) (keySize k) withUrlOptions $ liftIO .
checkBoth (geturl $ bucketObject info k) (keySize k)
checkKey r info (Just h) k = do checkKey r info (Just h) k = do
showChecking r showChecking r
checkKeyHelper info h (T.pack $ bucketObject info k) checkKeyHelper info h (T.pack $ bucketObject info k)
@ -316,38 +318,58 @@ checkKeyHelper info h object = do
| otherwise = Nothing | otherwise = Nothing
#endif #endif
storeExportS3 :: S3Info -> S3Handle -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool storeExportS3 :: UUID -> S3Info -> Maybe S3Handle -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
storeExportS3 info h f _k loc p = storeExportS3 _u info (Just h) f _k loc p =
catchNonAsync go (\e -> warning (show e) >> return False) catchNonAsync go (\e -> warning (show e) >> return False)
where where
go = do go = do
storeHelper info h f (T.pack $ bucketExportLocation info loc) p storeHelper info h f (T.pack $ bucketExportLocation info loc) p
return True return True
storeExportS3 u _ Nothing _ _ _ _ = do
needS3Creds u
return False
retrieveExportS3 :: S3Info -> S3Handle -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Bool retrieveExportS3 :: UUID -> S3Info -> Maybe S3Handle -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Bool
retrieveExportS3 info h _k loc f p = retrieveExportS3 u info mh _k loc f p =
catchNonAsync go (\e -> warning (show e) >> return False) catchNonAsync go (\e -> warning (show e) >> return False)
where where
go = do go = case mh of
retrieveHelper info h (T.pack $ bucketExportLocation info loc) f p Just h -> do
retrieveHelper info h (T.pack exporturl) f p
return True return True
Nothing -> case getpublicurl info of
Nothing -> do
needS3Creds u
return False
Just geturl -> Url.withUrlOptions $
liftIO . Url.download p (geturl exporturl) f
exporturl = bucketExportLocation info loc
removeExportS3 :: S3Info -> S3Handle -> Key -> ExportLocation -> Annex Bool removeExportS3 :: UUID -> S3Info -> Maybe S3Handle -> Key -> ExportLocation -> Annex Bool
removeExportS3 info h _k loc = removeExportS3 _u info (Just h) _k loc =
catchNonAsync go (\e -> warning (show e) >> return False) catchNonAsync go (\e -> warning (show e) >> return False)
where where
go = do go = do
res <- tryNonAsync $ sendS3Handle h $ res <- tryNonAsync $ sendS3Handle h $
S3.DeleteObject (T.pack $ bucketExportLocation info loc) (bucket info) S3.DeleteObject (T.pack $ bucketExportLocation info loc) (bucket info)
return $ either (const False) (const True) res return $ either (const False) (const True) res
removeExportS3 u _ Nothing _ _ = do
needS3Creds u
return False
checkPresentExportS3 :: S3Info -> S3Handle -> Key -> ExportLocation -> Annex Bool checkPresentExportS3 :: UUID -> S3Info -> Maybe S3Handle -> Key -> ExportLocation -> Annex Bool
checkPresentExportS3 info h _k loc = checkPresentExportS3 _u info (Just h) _k loc =
checkKeyHelper info h (T.pack $ bucketExportLocation info loc) checkKeyHelper info h (T.pack $ bucketExportLocation info loc)
checkPresentExportS3 u info Nothing k loc = case getpublicurl info of
Nothing -> do
needS3Creds u
giveup "No S3 credentials configured"
Just geturl -> withUrlOptions $ liftIO .
checkBoth (geturl $ bucketExportLocation info loc) (keySize k)
-- S3 has no move primitive; copy and delete. -- S3 has no move primitive; copy and delete.
renameExportS3 :: S3Info -> S3Handle -> Key -> ExportLocation -> ExportLocation -> Annex Bool renameExportS3 :: UUID -> S3Info -> Maybe S3Handle -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportS3 info h _k src dest = catchNonAsync go (\_ -> return False) renameExportS3 _u info (Just h) _k src dest = catchNonAsync go (\_ -> return False)
where where
go = do go = do
let co = S3.copyObject (bucket info) dstobject let co = S3.copyObject (bucket info) dstobject
@ -359,6 +381,9 @@ renameExportS3 info h _k src dest = catchNonAsync go (\_ -> return False)
return True return True
srcobject = T.pack $ bucketExportLocation info src srcobject = T.pack $ bucketExportLocation info src
dstobject = T.pack $ bucketExportLocation info dest dstobject = T.pack $ bucketExportLocation info dest
renameExportS3 u _ Nothing _ _ _ = do
needS3Creds u
return False
{- Generate the bucket if it does not already exist, including creating the {- Generate the bucket if it does not already exist, including creating the
- UUID file within the bucket. - UUID file within the bucket.
@ -477,7 +502,7 @@ withS3Handle :: RemoteConfig -> RemoteGitConfig -> UUID -> (S3Handle -> Annex a)
withS3Handle c gc u a = withS3HandleMaybe c gc u $ \mh -> case mh of withS3Handle c gc u a = withS3HandleMaybe c gc u $ \mh -> case mh of
Just h -> a h Just h -> a h
Nothing -> do Nothing -> do
warnMissingCredPairFor "S3" (AWS.creds u) needS3Creds u
giveup "No S3 credentials configured" giveup "No S3 credentials configured"
withS3HandleMaybe :: RemoteConfig -> RemoteGitConfig -> UUID -> (Maybe S3Handle -> Annex a) -> Annex a withS3HandleMaybe :: RemoteConfig -> RemoteGitConfig -> UUID -> (Maybe S3Handle -> Annex a) -> Annex a
@ -496,6 +521,9 @@ withS3HandleMaybe c gc u a = do
where where
s3cfg = s3Configuration c s3cfg = s3Configuration c
needS3Creds :: UUID -> Annex ()
needS3Creds u = warnMissingCredPairFor "S3" (AWS.creds u)
s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery
s3Configuration c = cfg s3Configuration c = cfg
{ S3.s3Port = port { S3.s3Port = port
@ -525,13 +553,13 @@ s3Configuration c = cfg
data S3Info = S3Info data S3Info = S3Info
{ bucket :: S3.Bucket { bucket :: S3.Bucket
, storageClass :: S3.StorageClass , storageClass :: S3.StorageClass
, bucketObject :: Key -> String , bucketObject :: Key -> BucketObject
, bucketExportLocation :: ExportLocation -> String , bucketExportLocation :: ExportLocation -> BucketObject
, metaHeaders :: [(T.Text, T.Text)] , metaHeaders :: [(T.Text, T.Text)]
, partSize :: Maybe Integer , partSize :: Maybe Integer
, isIA :: Bool , isIA :: Bool
, public :: Bool , public :: Bool
, getpublicurl :: Maybe (Key -> URLString) , getpublicurl :: Maybe (BucketObject -> URLString)
} }
extractS3Info :: RemoteConfig -> Annex S3Info extractS3Info :: RemoteConfig -> Annex S3Info
@ -552,13 +580,13 @@ extractS3Info c = do
Just "yes" -> True Just "yes" -> True
_ -> False _ -> False
, getpublicurl = case M.lookup "publicurl" c of , getpublicurl = case M.lookup "publicurl" c of
Just u -> Just $ genericPublicKeyUrl info u Just u -> Just $ \p -> genericPublicUrl p u
Nothing -> case M.lookup "host" c of Nothing -> case M.lookup "host" c of
Just h Just h
| h == AWS.s3DefaultHost -> | h == AWS.s3DefaultHost ->
Just $ awsPublicKeyUrl info Just (awsPublicUrl info)
| isIAHost h -> | isIAHost h ->
Just $ iaPublicKeyUrl info Just (iaPublicUrl info)
_ -> Nothing _ -> Nothing
} }
return info return info
@ -601,14 +629,14 @@ getMetaHeaders = map munge . filter ismetaheader . M.assocs
getFilePrefix :: RemoteConfig -> String getFilePrefix :: RemoteConfig -> String
getFilePrefix = M.findWithDefault "" "fileprefix" getFilePrefix = M.findWithDefault "" "fileprefix"
getBucketObject :: RemoteConfig -> Key -> FilePath getBucketObject :: RemoteConfig -> Key -> BucketObject
getBucketObject c = munge . key2file getBucketObject c = munge . key2file
where where
munge s = case M.lookup "mungekeys" c of munge s = case M.lookup "mungekeys" c of
Just "ia" -> iaMunge $ getFilePrefix c ++ s Just "ia" -> iaMunge $ getFilePrefix c ++ s
_ -> getFilePrefix c ++ s _ -> getFilePrefix c ++ s
getBucketExportLocation :: RemoteConfig -> ExportLocation -> FilePath getBucketExportLocation :: RemoteConfig -> ExportLocation -> BucketObject
getBucketExportLocation c loc = getFilePrefix c ++ fromExportLocation loc getBucketExportLocation c loc = getFilePrefix c ++ fromExportLocation loc
{- Internet Archive documentation limits filenames to a subset of ascii. {- Internet Archive documentation limits filenames to a subset of ascii.
@ -636,16 +664,16 @@ isIAHost h = ".archive.org" `isSuffixOf` map toLower h
iaItemUrl :: BucketName -> URLString iaItemUrl :: BucketName -> URLString
iaItemUrl b = "http://archive.org/details/" ++ b iaItemUrl b = "http://archive.org/details/" ++ b
iaPublicKeyUrl :: S3Info -> Key -> URLString iaPublicUrl :: S3Info -> BucketObject -> URLString
iaPublicKeyUrl info = genericPublicKeyUrl info $ iaPublicUrl info p = genericPublicUrl p $
"http://archive.org/download/" ++ T.unpack (bucket info) ++ "/" "http://archive.org/download/" ++ T.unpack (bucket info) ++ "/"
awsPublicKeyUrl :: S3Info -> Key -> URLString awsPublicUrl :: S3Info -> BucketObject -> URLString
awsPublicKeyUrl info = genericPublicKeyUrl info $ awsPublicUrl info p = genericPublicUrl p $
"https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/" "https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/"
genericPublicKeyUrl :: S3Info -> URLString -> Key -> URLString genericPublicUrl :: BucketObject -> URLString -> URLString
genericPublicKeyUrl info baseurl k = baseurl ++ bucketObject info k genericPublicUrl p baseurl = baseurl ++ p
genCredentials :: CredPair -> IO AWS.Credentials genCredentials :: CredPair -> IO AWS.Credentials
genCredentials (keyid, secret) = AWS.Credentials genCredentials (keyid, secret) = AWS.Credentials
@ -690,6 +718,6 @@ getWebUrls :: S3Info -> RemoteConfig -> Key -> Annex [URLString]
getWebUrls info c k getWebUrls info c k
| exportTree c = return [] | exportTree c = return []
| otherwise = case (public info, getpublicurl info) of | otherwise = case (public info, getpublicurl info) of
(True, Just geturl) -> return [geturl k] (True, Just geturl) -> return [geturl $ bucketObject info k]
_ -> return [] _ -> return []

View file

@ -1,2 +1,4 @@
S3 remotes using exporttree=yes cannot currently be accessed w/o creds. S3 remotes using exporttree=yes cannot currently be accessed w/o creds.
This is possible to support with some work. --[[Joey]] This is possible to support with some work. --[[Joey]]
> [[done]] --[[Joey]]