S3: Publically accessible buckets can be used without creds.

This commit is contained in:
Joey Hess 2015-06-05 16:23:35 -04:00
parent 4acd28bf21
commit 5f0f063a7a
8 changed files with 115 additions and 61 deletions

View file

@ -9,8 +9,9 @@ module Creds (
module Types.Creds,
CredPairStorage(..),
setRemoteCredPair,
getRemoteCredPairFor,
getRemoteCredPair,
getRemoteCredPairFor,
warnMissingCredPairFor,
getEnvCredPair,
writeCacheCreds,
readCacheCreds,
@ -74,18 +75,6 @@ setRemoteCredPair _ c storage (Just creds)
{- Gets a remote's credpair, from the environment if set, otherwise
- from the cache in gitAnnexCredsDir, or failing that, from the
- value in RemoteConfig. -}
getRemoteCredPairFor :: String -> RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair)
getRemoteCredPairFor this c storage = maybe missing (return . Just) =<< getRemoteCredPair c storage
where
(loginvar, passwordvar) = credPairEnvironment storage
missing = do
warning $ unwords
[ "Set both", loginvar
, "and", passwordvar
, "to use", this
]
return Nothing
getRemoteCredPair :: RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair)
getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
where
@ -122,6 +111,23 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
return $ Just credpair
_ -> error "bad creds"
getRemoteCredPairFor :: String -> RemoteConfig -> CredPairStorage -> Annex (Maybe CredPair)
getRemoteCredPairFor this c storage = go =<< getRemoteCredPair c storage
where
go Nothing = do
warnMissingCredPairFor this storage
return Nothing
go (Just credpair) = return $ Just credpair
warnMissingCredPairFor :: String -> CredPairStorage -> Annex ()
warnMissingCredPairFor this storage = warning $ unwords
[ "Set both", loginvar
, "and", passwordvar
, "to use", this
]
where
(loginvar, passwordvar) = credPairEnvironment storage
{- Gets a CredPair from the environment. -}
getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair)
getEnvCredPair storage = liftM2 (,)

View file

@ -45,6 +45,9 @@ import Annex.UUID
import Logs.Web
import Utility.Metered
import Utility.DataUnits
import Annex.Content
import Annex.Url (withUrlOptions)
import Utility.Url (checkBoth)
type BucketName = String
@ -64,7 +67,7 @@ gen r u c gc = do
where
new cst info = Just $ specialRemote c
(prepareS3Handle this $ store this info)
(prepareS3HandleMaybe this $ retrieve info)
(prepareS3HandleMaybe this $ retrieve this info)
(prepareS3Handle this $ remove info)
(prepareS3HandleMaybe this $ checkKey this info)
this
@ -90,7 +93,7 @@ gen r u c gc = do
, availability = GloballyAvailable
, remotetype = remote
, mkUnavailable = gen r u (M.insert "host" "!dne!" c) gc
, getInfo = includeCredsInfo c (AWS.creds u) (s3Info c)
, getInfo = includeCredsInfo c (AWS.creds u) (s3Info c info)
, claimUrl = Nothing
, checkUrl = Nothing
}
@ -158,7 +161,7 @@ prepareS3HandleMaybe r = resourcePrepare $ const $
withS3HandleMaybe (config r) (uuid r)
store :: Remote -> S3Info -> S3Handle -> Storer
store r info h = fileStorer $ \k f p -> do
store _r info h = fileStorer $ \k f p -> do
case partSize info of
Just partsz | partsz > 0 -> do
fsz <- liftIO $ getFileSize f
@ -168,15 +171,15 @@ store r info h = fileStorer $ \k f p -> do
_ -> singlepartupload k f p
-- Store public URL to item in Internet Archive.
when (isIA info && not (isChunkKey k)) $
setUrlPresent webUUID k (iaKeyUrl r k)
setUrlPresent webUUID k (iaPublicKeyUrl info k)
return True
where
singlepartupload k f p = do
rbody <- liftIO $ httpBodyStorer f p
void $ sendS3Handle h $ putObject info (bucketObject info k) rbody
void $ sendS3Handle h $ putObject info (T.pack $ bucketObject info k) rbody
multipartupload fsz partsz k f p = do
#if MIN_VERSION_aws(0,10,6)
let object = bucketObject info k
let object = T.pack (bucketObject info k)
let startreq = (S3.postInitiateMultipartUpload (bucket info) object)
{ S3.imuStorageClass = Just (storageClass info)
@ -222,10 +225,10 @@ store r info h = fileStorer $ \k f p -> do
{- Implemented as a fileRetriever, that uses conduit to stream the chunks
- out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -}
retrieve :: S3Info -> Maybe S3Handle -> Retriever
retrieve info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
retrieve :: Remote -> S3Info -> Maybe S3Handle -> Retriever
retrieve _ info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
(fr, fh) <- allocate (openFile f WriteMode) hClose
let req = S3.getObject (bucket info) (bucketObject info k)
let req = S3.getObject (bucket info) (T.pack $ bucketObject info k)
S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req
responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed
release fr
@ -240,7 +243,13 @@ retrieve info (Just h) = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
void $ meterupdate sofar'
S.hPut fh bs
sinkprogressfile fh meterupdate sofar'
retrieve _info Nothing = error "TODO"
retrieve r info Nothing = case getpublicurl info of
Nothing -> \_ _ _ -> do
warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
return False
Just geturl -> fileRetriever $ \f k _p ->
unlessM (downloadUrl [geturl k] f) $
error "failed to download content"
retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
@ -255,7 +264,7 @@ remove info h k
return False
| otherwise = do
res <- tryNonAsync $ sendS3Handle h $
S3.DeleteObject (bucketObject info k) (bucket info)
S3.DeleteObject (T.pack $ bucketObject info k) (bucket info)
return $ either (const False) (const True) res
checkKey :: Remote -> S3Info -> Maybe S3Handle -> CheckPresent
@ -271,7 +280,7 @@ checkKey r info (Just h) k = do
#endif
where
go = sendS3Handle h $
S3.headObject (bucket info) (bucketObject info k)
S3.headObject (bucket info) (T.pack $ bucketObject info k)
#if ! MIN_VERSION_aws(0,10,0)
{- Catch exception headObject returns when an object is not present
@ -285,7 +294,14 @@ checkKey r info (Just h) k = do
| AWS.headerErrorMessage e == "ETag missing" = Just ()
| otherwise = Nothing
#endif
checkKey _r _info Nothing _k = error "TODO"
checkKey r info Nothing k = case getpublicurl info of
Nothing -> do
warnMissingCredPairFor "S3" (AWS.creds $ uuid r)
error "No S3 credentials configured"
Just geturl -> do
showAction $ "checking " ++ name r
withUrlOptions $ checkBoth (geturl k) (keySize k)
{- Generate the bucket if it does not already exist, including creating the
- UUID file within the bucket.
@ -385,11 +401,13 @@ withS3Handle :: RemoteConfig -> UUID -> (S3Handle -> Annex a) -> Annex a
withS3Handle c u a = do
withS3HandleMaybe c u $ \mh -> case mh of
Just h -> a h
Nothing -> error "No S3 credentials configured"
Nothing -> do
warnMissingCredPairFor "S3" (AWS.creds u)
error "No S3 credentials configured"
withS3HandleMaybe :: RemoteConfig -> UUID -> (Maybe S3Handle -> Annex a) -> Annex a
withS3HandleMaybe c u a = do
mcreds <- getRemoteCredPairFor "S3" c (AWS.creds u)
mcreds <- getRemoteCredPair c (AWS.creds u)
case mcreds of
Just creds -> do
awscreds <- liftIO $ genCredentials creds
@ -427,11 +445,12 @@ tryS3 a = (Right <$> a) `catch` (pure . Left)
data S3Info = S3Info
{ bucket :: S3.Bucket
, storageClass :: S3.StorageClass
, bucketObject :: Key -> T.Text
, bucketObject :: Key -> String
, metaHeaders :: [(T.Text, T.Text)]
, partSize :: Maybe Integer
, isIA :: Bool
, acl :: Maybe S3.CannedAcl
, public :: Bool
, getpublicurl :: Maybe (Key -> URLString)
}
extractS3Info :: RemoteConfig -> Annex S3Info
@ -440,17 +459,27 @@ extractS3Info c = do
(error "S3 bucket not configured")
(return . T.pack)
(getBucketName c)
return $ S3Info
let info = S3Info
{ bucket = b
, storageClass = getStorageClass c
, bucketObject = T.pack . getBucketObject c
, bucketObject = getBucketObject c
, metaHeaders = getMetaHeaders c
, partSize = getPartSize c
, isIA = configIA c
, acl = case M.lookup "public" c of
Just "yes" -> Just S3.AclPublicRead
_ -> Nothing
, public = case M.lookup "public" c of
Just "yes" -> True
_ -> False
, getpublicurl = case M.lookup "publicurl" c of
Just u -> Just $ genericPublicKeyUrl info u
Nothing -> case M.lookup "host" c of
Just h
| h == AWS.s3DefaultHost ->
Just $ awsPublicKeyUrl info
| isIAHost h ->
Just $ iaPublicKeyUrl info
_ -> Nothing
}
return info
putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject
putObject info file rbody = (S3.putObject (bucket info) file rbody)
@ -460,6 +489,11 @@ putObject info file rbody = (S3.putObject (bucket info) file rbody)
, S3.poAcl = acl info
}
acl :: S3Info -> Maybe S3.CannedAcl
acl info
| public info = Just S3.AclPublicRead
| otherwise = Nothing
getBucketName :: RemoteConfig -> Maybe BucketName
getBucketName = map toLower <$$> M.lookup "bucket"
@ -514,10 +548,16 @@ isIAHost h = ".archive.org" `isSuffixOf` map toLower h
iaItemUrl :: BucketName -> URLString
iaItemUrl b = "http://archive.org/details/" ++ b
iaKeyUrl :: Remote -> Key -> URLString
iaKeyUrl r k = "http://archive.org/download/" ++ b ++ "/" ++ getBucketObject (config r) k
where
b = fromMaybe "" $ getBucketName $ config r
iaPublicKeyUrl :: S3Info -> Key -> URLString
iaPublicKeyUrl info = genericPublicKeyUrl info $
"http://archive.org/download/" ++ T.unpack (bucket info) ++ "/"
awsPublicKeyUrl :: S3Info -> Key -> URLString
awsPublicKeyUrl info = genericPublicKeyUrl info $
"https://" ++ T.unpack (bucket info) ++ ".s3.amazonaws.com/"
genericPublicKeyUrl :: S3Info -> URLString -> Key -> URLString
genericPublicKeyUrl info baseurl k = baseurl ++ bucketObject info k
genCredentials :: CredPair -> IO AWS.Credentials
genCredentials (keyid, secret) = AWS.Credentials
@ -539,8 +579,8 @@ debugMapper level t = forward "S3" (T.unpack t)
AWS.Warning -> warningM
AWS.Error -> errorM
s3Info :: RemoteConfig -> [(String, String)]
s3Info c = catMaybes
s3Info :: RemoteConfig -> S3Info -> [(String, String)]
s3Info c info = catMaybes
[ Just ("bucket", fromMaybe "unknown" (getBucketName c))
, Just ("endpoint", w82s (S.unpack (S3.s3Endpoint s3c)))
, Just ("port", show (S3.s3Port s3c))
@ -549,6 +589,7 @@ s3Info c = catMaybes
then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c)
else Nothing
, Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
, Just ("public", if public info then "yes" else "no")
]
where
s3c = s3Configuration c

1
debian/changelog vendored
View file

@ -14,6 +14,7 @@ git-annex (5.20150529) UNRELEASED; urgency=medium
it from the import location.
* S3: Special remotes can be configured with public=yes to allow
the public to access the bucket's content.
* S3: Publically accessible buckets can be used without creds.
-- Joey Hess <id@joeyh.name> Sat, 30 May 2015 02:07:18 -0400

View file

@ -50,8 +50,12 @@ the S3 remote.
* `public` - Set to "yes" to allow public read access to files sent
to the S3 remote. This is accomplished by setting an ACL when each
file is uploaded to the remote. So, it can be changed but changes
will only affect subseqent uploads.
file is uploaded to the remote. So, changes to this setting will
only affect subseqent uploads.
* `publicurl` - Configure the URL that is used to download files
from the bucket when they are available publically.
(This is automatically configured for Amazon S3 and the Internet Archive.)
* `partsize` - Amazon S3 only accepts uploads up to a certian file size,
and storing larger files requires a multipart upload process.

View file

@ -22,16 +22,17 @@ Next, create the S3 remote, and describe it.
The configuration for the S3 remote is stored in git. So to make another
repository use the same S3 remote is easy:
# cd /media/usb/annex
# export AWS_ACCESS_KEY_ID="08TJMT99S3511WOZEP91"
# export AWS_SECRET_ACCESS_KEY="s3kr1t"
# git pull laptop
# git annex enableremote cloud
enableremote cloud (gpg) (checking bucket) ok
Now the remote can be used like any other remote.
Notice that to enable an existing S3 remote, you have to provide the Amazon
AWS credentials because they were not stored in the repository. (It is
possible to configure git-annex to do that, but not the default.)
# git annex copy my_cool_big_file --to cloud
copy my_cool_big_file (gpg) (checking cloud...) (to cloud...) ok
# git annex move video/hackity_hack_and_kaxxt.mov --to cloud
move video/hackity_hack_and_kaxxt.mov (checking cloud...) (to cloud...) ok
See [[public_Amazon_S3_remote]] for how to set up a Amazon S3 remote that
can be used by the public, without them needing AWS credentials.
See [[special_remotes/S3]] for details.
See [[special_remotes/S3]] for details about configuring S3 remotes.

View file

@ -9,3 +9,5 @@ Besides, you never know if and when the file really is available on s3, so runni
How hard would it be to fix that in the s3 remote?
Thanks! --[[anarcat]]
> [[done]] --[[Joey]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2015-06-05T20:17:38Z"
content="""
The remote can indeed fallback when there are no creds.
Also, git-annex can set an ACL on files it uploads, if the remote is
configured with public=yes, so no manual ACL setting will be needed.
"""]]

View file

@ -1,11 +0,0 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2015-06-05T17:28:52Z"
content="""
Based on
<http://docs.aws.amazon.com/AmazonS3/latest/dev/WebsiteEndpoints.html>
and my testing, S3 does not default to allowing public access to buckets. So,
this seems like something that it makes sense for the user to
manually configure when setting up a s3 remote.
"""]]