S3: Publically accessible buckets can be used without creds.

This commit is contained in:
Joey Hess 2015-06-05 16:23:35 -04:00
parent 4acd28bf21
commit 5f0f063a7a
8 changed files with 115 additions and 61 deletions

View file

@ -9,8 +9,9 @@ module Creds (
module Types.Creds, module Types.Creds,
CredPairStorage(..), CredPairStorage(..),
setRemoteCredPair, setRemoteCredPair,
getRemoteCredPairFor,
getRemoteCredPair, getRemoteCredPair,
getRemoteCredPairFor,
warnMissingCredPairFor,
getEnvCredPair, getEnvCredPair,
writeCacheCreds, writeCacheCreds,
readCacheCreds, readCacheCreds,
@ -74,18 +75,6 @@ setRemoteCredPair _ c storage (Just creds)
{- Gets a remote's credpair, from the environment if set, otherwise {- Gets a remote's credpair, from the environment if set, otherwise
- from the cache in gitAnnexCredsDir, or failing that, from the - from the cache in gitAnnexCredsDir, or failing that, from the
- value in RemoteConfig. -} - value in RemoteConfig. -}
getRemoteCredPairFor :: String -> RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair)
getRemoteCredPairFor this c storage = maybe missing (return . Just) =<< getRemoteCredPair c storage
where
(loginvar, passwordvar) = credPairEnvironment storage
missing = do
warning $ unwords
[ "Set both", loginvar
, "and", passwordvar
, "to use", this
]
return Nothing
getRemoteCredPair :: RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair) getRemoteCredPair :: RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair)
getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
where where
@ -122,6 +111,23 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
return $ Just credpair return $ Just credpair
_ -> error "bad creds" _ -> error "bad creds"
getRemoteCredPairFor :: String -> RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair)
getRemoteCredPairFor this c storage = go =<< getRemoteCredPair c storage
where
go Nothing = do
warnMissingCredPairFor this storage
return Nothing
go (Just credpair) = return $ Just credpair
warnMissingCredPairFor :: String -> CredPairStorage -> Annex ()
warnMissingCredPairFor this storage = warning $ unwords
[ "Set both", loginvar
, "and", passwordvar
, "to use", this
]
where
(loginvar, passwordvar) = credPairEnvironment storage
{- Gets a CredPair from the environment. -} {- Gets a CredPair from the environment. -}
getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair) getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair)
getEnvCredPair storage = liftM2 (,) getEnvCredPair storage = liftM2 (,)

View file

@ -45,6 +45,9 @@ import Annex.UUID
import Logs.Web import Logs.Web
import Utility.Metered import Utility.Metered
import Utility.DataUnits import Utility.DataUnits
import Annex.Content
import Annex.Url (withUrlOptions)
import Utility.Url (checkBoth)
type BucketName = String type BucketName = String
@ -64,7 +67,7 @@ gen r u c gc = do
where where
new cst info = Just $ specialRemote c new cst info = Just $ specialRemote c
(prepareS3Handle this $ store this info) (prepareS3Handle this $ store this info)
(prepareS3HandleMaybe this $ retrieve info) (prepareS3HandleMaybe this $ retrieve this info)
(prepareS3Handle this $ remove info) (prepareS3Handle this $ remove info)
(prepareS3HandleMaybe this $ checkKey this info) (prepareS3HandleMaybe this $ checkKey this info)
this this
@ -90,7 +93,7 @@ gen r u c gc = do
, availability = GloballyAvailable , availability = GloballyAvailable
, remotetype = remote , remotetype = remote
, mkUnavailable = gen r u (M.insert "host" "!dne!" c) gc , mkUnavailable = gen r u (M.insert "host" "!dne!" c) gc
, getInfo = includeCredsInfo c (AWS.creds u) (s3Info c) , getInfo = includeCredsInfo c (AWS.creds u) (s3Info c info)
, claimUrl = Nothing , claimUrl = Nothing
, checkUrl = Nothing , checkUrl = Nothing
} }
@ -158,7 +161,7 @@ prepareS3HandleMaybe r = resourcePrepare $ const $
withS3HandleMaybe (config r) (uuid r) withS3HandleMaybe (config r) (uuid r)
store :: Remote -> S3Info -> S3Handle -> Storer store :: Remote -> S3Info -> S3Handle -> Storer
store r info h = fileStorer $ \k f p -> do store _r info h = fileStorer $ \k f p -> do
case partSize info of case partSize info of
Just partsz | partsz > 0 -> do Just partsz | partsz > 0 -> do
fsz <- liftIO $ getFileSize f fsz <- liftIO $ getFileSize f
@ -168,15 +171,15 @@ store r info h = fileStorer $ \k f p -> do
_ -> singlepartupload k f p _ -> singlepartupload k f p
-- Store public URL to item in Internet Archive. -- Store public URL to item in Internet Archive.
when (isIA info && not (isChunkKey k)) $ when (isIA info && not (isChunkKey k)) $
setUrlPresent webUUID k (iaKeyUrl r k) setUrlPresent webUUID k (iaPublicKeyUrl info k)
return True return True
where where
singlepartupload k f p = do singlepartupload k f p = do
rbody <- liftIO $ httpBodyStorer f p rbody <- liftIO $ httpBodyStorer f p
void $ sendS3Handle h $ putObject info (bucketObject info k) rbody void $ sendS3Handle h $ putObject info (T.pack $ bucketObject info k) rbody
multipartupload fsz partsz k f p = do multipartupload fsz partsz k f p = do
#if MIN_VERSION_aws(0,10,6) #if MIN_VERSION_aws(0,10,6)
let object = bucketObject info k let object = T.pack (bucketObject info k)
let startreq = (S3.postInitiateMultipartUpload (bucket info) object) let startreq = (S3.postInitiateMultipartUpload (bucket info) object)
{ S3.imuStorageClass = Just (storageClass info) { S3.imuStorageClass = Just (storageClass info)
@ -222,10 +225,10 @@ store r info h = fileStorer $ \k f p -> do
{- Implemented as a fileRetriever, that uses conduit to stream the chunks {- Implemented as a fileRetriever, that uses conduit to stream the chunks
- out to the file. Would be better to implement a byteRetriever, but - out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -} - that is difficult. -}
retrieve :: S3Info -> Maybe S3Handle -> Retriever retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever
retrieve info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do retrieve _ info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
(fr, fh) <- allocate (openFile f WriteMode) hClose (fr, fh) <- allocate (openFile f WriteMode) hClose
let req = S3.getObject (bucket info) (bucketObject info k) let req = S3.getObject (bucket info) (T.pack $ bucketObject info k)
S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req
responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed
release fr release fr
@ -240,7 +243,13 @@ retrieve info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
void $ meterupdate sofar' void $ meterupdate sofar'
S.hPut fh bs S.hPut fh bs
sinkprogressfile fh meterupdate sofar' sinkprogressfile fh meterupdate sofar'
retrieve _info Nothing = error "TODO" retrieve r info Nothing = case getpublicurl info of
Nothing -> \_ _ _ -> do
warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
return False
Just geturl -> fileRetriever $ \f k _p ->
unlessM (downloadUrl [geturl k] f) $
error "failed to download content"
retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False retrieveCheap _ _ _ = return False
@ -255,7 +264,7 @@ remove info h k
return False return False
| otherwise = do | otherwise = do
res <- tryNonAsync $ sendS3Handle h $ res <- tryNonAsync $ sendS3Handle h $
S3.DeleteObject (bucketObject info k) (bucket info) S3.DeleteObject (T.pack $ bucketObject info k) (bucket info)
return $ either (const False) (const True) res return $ either (const False) (const True) res
checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent
@ -271,7 +280,7 @@ checkKey r info (Just h) k = do
#endif #endif
where where
go = sendS3Handle h $ go = sendS3Handle h $
S3.headObject (bucket info) (bucketObject info k) S3.headObject (bucket info) (T.pack $ bucketObject info k)
#if ! MIN_VERSION_aws(0,10,0) #if ! MIN_VERSION_aws(0,10,0)
{- Catch exception headObject returns when an object is not present {- Catch exception headObject returns when an object is not present
@ -285,7 +294,14 @@ checkKey r info (Just h) k = do
| AWS.headerErrorMessage e == "ETag missing" = Just () | AWS.headerErrorMessage e == "ETag missing" = Just ()
| otherwise = Nothing | otherwise = Nothing
#endif #endif
checkKey _r _info Nothing _k = error "TODO"
checkKey r info Nothing k = case getpublicurl info of
Nothing -> do
warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
error "No S3 credentials configured"
Just geturl -> do
showAction $ "checking " ++ name r
withUrlOptions $ checkBoth (geturl k) (keySize k)
{- Generate the bucket if it does not already exist, including creating the {- Generate the bucket if it does not already exist, including creating the
- UUID file within the bucket. - UUID file within the bucket.
@ -385,11 +401,13 @@ withS3Handle :: RemoteConfig -> UUID -> (S3Handle -> Annex a) -> Annex a
withS3Handle c u a = do withS3Handle c u a = do
withS3HandleMaybe c u $ \mh -> case mh of withS3HandleMaybe c u $ \mh -> case mh of
Just h -> a h Just h -> a h
Nothing -> error "No S3 credentials configured" Nothing -> do
warnMissingCredPairFor "S3" (AWS.creds u)
error "No S3 credentials configured"
withS3HandleMaybe :: RemoteConfig -> UUID -> (Maybe S3Handle -> Annex a) -> Annex a withS3HandleMaybe :: RemoteConfig -> UUID -> (Maybe S3Handle -> Annex a) -> Annex a
withS3HandleMaybe c u a = do withS3HandleMaybe c u a = do
mcreds <- getRemoteCredPairFor "S3" c (AWS.creds u) mcreds <- getRemoteCredPair c (AWS.creds u)
case mcreds of case mcreds of
Just creds -> do Just creds -> do
awscreds <- liftIO $ genCredentials creds awscreds <- liftIO $ genCredentials creds
@ -427,11 +445,12 @@ tryS3 a = (Right <$> a) `catch` (pure . Left)
data S3Info = S3Info data S3Info = S3Info
{ bucket :: S3.Bucket { bucket :: S3.Bucket
, storageClass :: S3.StorageClass , storageClass :: S3.StorageClass
, bucketObject :: Key -> T.Text , bucketObject :: Key -> String
, metaHeaders :: [(T.Text, T.Text)] , metaHeaders :: [(T.Text, T.Text)]
, partSize :: Maybe Integer , partSize :: Maybe Integer
, isIA :: Bool , isIA :: Bool
, acl :: Maybe S3.CannedAcl , public :: Bool
, getpublicurl :: Maybe (Key -> URLString)
} }
extractS3Info :: RemoteConfig -> Annex S3Info extractS3Info :: RemoteConfig -> Annex S3Info
@ -440,17 +459,27 @@ extractS3Info c = do
(error "S3 bucket not configured") (error "S3 bucket not configured")
(return . T.pack) (return . T.pack)
(getBucketName c) (getBucketName c)
return $ S3Info let info = S3Info
{ bucket = b { bucket = b
, storageClass = getStorageClass c , storageClass = getStorageClass c
, bucketObject = T.pack . getBucketObject c , bucketObject = getBucketObject c
, metaHeaders = getMetaHeaders c , metaHeaders = getMetaHeaders c
, partSize = getPartSize c , partSize = getPartSize c
, isIA = configIA c , isIA = configIA c
, acl = case M.lookup "public" c of , public = case M.lookup "public" c of
Just "yes" -> Just S3.AclPublicRead Just "yes" -> True
_ -> Nothing _ -> False
, getpublicurl = case M.lookup "publicurl" c of
Just u -> Just $ genericPublicKeyUrl info u
Nothing -> case M.lookup "host" c of
Just h
| h == AWS.s3DefaultHost ->
Just $ awsPublicKeyUrl info
| isIAHost h ->
Just $ iaPublicKeyUrl info
_ -> Nothing
} }
return info
putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject
putObject info file rbody = (S3.putObject (bucket info) file rbody) putObject info file rbody = (S3.putObject (bucket info) file rbody)
@ -460,6 +489,11 @@ putObject info file rbody = (S3.putObject (bucket info) file rbody)
, S3.poAcl = acl info , S3.poAcl = acl info
} }
acl :: S3Info -> Maybe S3.CannedAcl
acl info
| public info = Just S3.AclPublicRead
| otherwise = Nothing
getBucketName :: RemoteConfig -> Maybe BucketName getBucketName :: RemoteConfig -> Maybe BucketName
getBucketName = map toLower <$$> M.lookup "bucket" getBucketName = map toLower <$$> M.lookup "bucket"
@ -514,10 +548,16 @@ isIAHost h = ".archive.org" `isSuffixOf` map toLower h
iaItemUrl :: BucketName -> URLString iaItemUrl :: BucketName -> URLString
iaItemUrl b = "http://archive.org/details/" ++ b iaItemUrl b = "http://archive.org/details/" ++ b
iaKeyUrl :: Remote -> Key -> URLString iaPublicKeyUrl :: S3Info -> Key -> URLString
iaKeyUrl r k = "http://archive.org/download/" ++ b ++ "/" ++ getBucketObject (config r) k iaPublicKeyUrl info = genericPublicKeyUrl info $
where "http://archive.org/download/" ++ T.unpack (bucket info) ++ "/"
b = fromMaybe "" $ getBucketName $ config r
awsPublicKeyUrl :: S3Info -> Key -> URLString
awsPublicKeyUrl info = genericPublicKeyUrl info $
"https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/"
genericPublicKeyUrl :: S3Info -> URLString -> Key -> URLString
genericPublicKeyUrl info baseurl k = baseurl ++ bucketObject info k
genCredentials :: CredPair -> IO AWS.Credentials genCredentials :: CredPair -> IO AWS.Credentials
genCredentials (keyid, secret) = AWS.Credentials genCredentials (keyid, secret) = AWS.Credentials
@ -539,8 +579,8 @@ debugMapper level t = forward "S3" (T.unpack t)
AWS.Warning -> warningM AWS.Warning -> warningM
AWS.Error -> errorM AWS.Error -> errorM
s3Info :: RemoteConfig -> [(String, String)] s3Info :: RemoteConfig -> S3Info -> [(String, String)]
s3Info c = catMaybes s3Info c info = catMaybes
[ Just ("bucket", fromMaybe "unknown" (getBucketName c)) [ Just ("bucket", fromMaybe "unknown" (getBucketName c))
, Just ("endpoint", w82s (S.unpack (S3.s3Endpoint s3c))) , Just ("endpoint", w82s (S.unpack (S3.s3Endpoint s3c)))
, Just ("port", show (S3.s3Port s3c)) , Just ("port", show (S3.s3Port s3c))
@ -549,6 +589,7 @@ s3Info c = catMaybes
then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c) then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c)
else Nothing else Nothing
, Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c)) , Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
, Just ("public", if public info then "yes" else "no")
] ]
where where
s3c = s3Configuration c s3c = s3Configuration c

1
debian/changelog vendored
View file

@ -14,6 +14,7 @@ git-annex (5.20150529) UNRELEASED; urgency=medium
it from the import location. it from the import location.
* S3: Special remotes can be configured with public=yes to allow * S3: Special remotes can be configured with public=yes to allow
the public to access the bucket's content. the public to access the bucket's content.
* S3: Publically accessible buckets can be used without creds.
-- Joey Hess <id@joeyh.name> Sat, 30 May 2015 02:07:18 -0400 -- Joey Hess <id@joeyh.name> Sat, 30 May 2015 02:07:18 -0400

View file

@ -50,8 +50,12 @@ the S3 remote.
* `public` - Set to "yes" to allow public read access to files sent * `public` - Set to "yes" to allow public read access to files sent
to the S3 remote. This is accomplished by setting an ACL when each to the S3 remote. This is accomplished by setting an ACL when each
file is uploaded to the remote. So, it can be changed but changes file is uploaded to the remote. So, changes to this setting will
will only affect subseqent uploads. only affect subseqent uploads.
* `publicurl` - Configure the URL that is used to download files
from the bucket when they are available publically.
(This is automatically configured for Amazon S3 and the Internet Archive.)
* `partsize` - Amazon S3 only accepts uploads up to a certian file size, * `partsize` - Amazon S3 only accepts uploads up to a certian file size,
and storing larger files requires a multipart upload process. and storing larger files requires a multipart upload process.

View file

@ -22,16 +22,17 @@ Next, create the S3 remote, and describe it.
The configuration for the S3 remote is stored in git. So to make another The configuration for the S3 remote is stored in git. So to make another
repository use the same S3 remote is easy: repository use the same S3 remote is easy:
# cd /media/usb/annex # export AWS_ACCESS_KEY_ID="08TJMT99S3511WOZEP91"
# export AWS_SECRET_ACCESS_KEY="s3kr1t"
# git pull laptop # git pull laptop
# git annex enableremote cloud # git annex enableremote cloud
enableremote cloud (gpg) (checking bucket) ok enableremote cloud (gpg) (checking bucket) ok
Now the remote can be used like any other remote. Notice that to enable an existing S3 remote, you have to provide the Amazon
AWS credentials because they were not stored in the repository. (It is
possible to configure git-annex to do that, but not the default.)
# git annex copy my_cool_big_file --to cloud See [[public_Amazon_S3_remote]] for how to set up a Amazon S3 remote that
copy my_cool_big_file (gpg) (checking cloud...) (to cloud...) ok can be used by the public, without them needing AWS credentials.
# git annex move video/hackity_hack_and_kaxxt.mov --to cloud
move video/hackity_hack_and_kaxxt.mov (checking cloud...) (to cloud...) ok
See [[special_remotes/S3]] for details. See [[special_remotes/S3]] for details about configuring S3 remotes.

View file

@ -9,3 +9,5 @@ Besides, you never know if and when the file really is available on s3, so runni
How hard would it be to fix that in the s3 remote? How hard would it be to fix that in the s3 remote?
Thanks! --[[anarcat]] Thanks! --[[anarcat]]
> [[done]] --[[Joey]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2015-06-05T20:17:38Z"
content="""
The remote can indeed fallback when there are no creds.
Also, git-annex can set an ACL on files it uploads, if the remote is
configured with public=yes, so no manual ACL setting will be needed.
"""]]

View file

@ -1,11 +0,0 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2015-06-05T17:28:52Z"
content="""
Based on
<http://docs.aws.amazon.com/AmazonS3/latest/dev/WebsiteEndpoints.html>
and my testing, S3 does not default to allowing public access to buckets. So,
this seems like something that it makes sense for the user to
manually configure when setting up a s3 remote.
"""]]