WIP converting S3 special remote from hS3 to aws library

Currently, initremote works, but not the other operations. They should be
fairly easy to add from this base.

Also, https://github.com/aristidb/aws/issues/119 blocks internet archive
support.

Note that since http-conduit is used, this also adds https support to S3.
Although git-annex encrypts everything anyway, so that may not be extremely
useful. It is not enabled by default, because existing S3 special remotes
have port=80 in their config. Setting port=443 will enable it.

This commit was sponsored by Daniel Brockman.
This commit is contained in:
Joey Hess 2014-08-08 18:54:04 -04:00
parent 7712e70885
commit 6fcca2f13e
5 changed files with 165 additions and 64 deletions

View file

@ -1,6 +1,6 @@
{- Amazon Web Services common infrastructure.
-
- Copyright 2011,2012 Joey Hess <joey@kitenet.net>
- Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -12,8 +12,14 @@ module Remote.Helper.AWS where
import Common.Annex
import Creds
import qualified Aws
import qualified Aws.S3 as S3
import qualified Data.Map as M
import qualified Data.ByteString as B
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import Data.Text (Text)
import Data.IORef
creds :: UUID -> CredPairStorage
creds u = CredPairStorage
@ -22,6 +28,13 @@ creds u = CredPairStorage
, credPairRemoteKey = Just "s3creds"
}
genCredentials :: CredPair -> IO Aws.Credentials
genCredentials (keyid, secret) = Aws.Credentials
<$> pure (encodeUtf8 (T.pack keyid))
<*> pure (encodeUtf8 (T.pack secret))
<*> newIORef []
<*> pure Nothing
data Service = S3 | Glacier
deriving (Eq)
@ -33,9 +46,10 @@ regionMap = M.fromList . regionInfo
defaultRegion :: Service -> Region
defaultRegion = snd . Prelude.head . regionInfo
{- S3 and Glacier use different names for some regions. Ie, "us-east-1"
- cannot be used with S3, while "US" cannot be used with Glacier. Dunno why.
- Also, Glacier is not yet available in all regions. -}
data ServiceRegion = BothRegion Region | S3Region Region | GlacierRegion Region
{- The "US" and "EU" names are used as location constraints when creating a
- S3 bucket. -}
regionInfo :: Service -> [(Text, Region)]
regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $
filter (matchingService . snd) $
@ -60,4 +74,14 @@ regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $
matchingService (S3Region _) = service == S3
matchingService (GlacierRegion _) = service == Glacier
data ServiceRegion = BothRegion Region | S3Region Region | GlacierRegion Region
s3HostName :: Region -> B.ByteString
s3HostName "US" = "s3.amazonaws.com"
s3HostName "EU" = "s3-eu-west-1.amazonaws.com"
s3HostName r = encodeUtf8 $ T.concat ["s3-", r, ".amazonaws.com"]
s3DefaultHost :: String
s3DefaultHost = "s3.amazonaws.com"
mkLocationConstraint :: Region -> S3.LocationConstraint
mkLocationConstraint "US" = S3.locationUsClassic
mkLocationConstraint r = r

View file

@ -1,15 +1,19 @@
{- S3 remotes
-
- Copyright 2011-2013 Joey Hess <joey@kitenet.net>
- Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE TypeFamilies #-}
module Remote.S3 (remote, iaHost, isIA, isIAHost, iaItemUrl) where
import qualified Aws as AWS
import qualified Aws.Core as AWS
import qualified Aws.S3 as S3
import Network.AWS.AWSConnection
import Network.AWS.S3Object hiding (getStorageClass)
import Network.AWS.S3Bucket hiding (size)
import Network.AWS.AWSResult
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
@ -17,6 +21,11 @@ import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import Data.Char
import Network.Socket (HostName)
import Network.HTTP.Conduit (Manager, newManager, closeManager)
import Network.HTTP.Client (defaultManagerSettings, managerResponseTimeout, responseStatus, responseBody, RequestBody(..))
import Network.HTTP.Types
import Control.Monad.Trans.Resource
import Control.Monad.Catch
import Common.Annex
import Types.Remote
@ -86,8 +95,8 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost
defaults = M.fromList
[ ("datacenter", T.unpack $ AWS.defaultRegion AWS.S3)
, ("storageclass", "STANDARD")
, ("host", defaultAmazonS3Host)
, ("port", show defaultAmazonS3Port)
, ("host", AWS.s3DefaultHost)
, ("port", "80")
, ("bucket", defbucket)
]
@ -119,7 +128,8 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost
M.insert "mungekeys" "ia" $
-- bucket created only when files are uploaded
M.insert "x-amz-auto-make-bucket" "1" defaults
writeUUIDFile archiveconfig u
withS3Handle archiveconfig u $
writeUUIDFile archiveconfig u
use archiveconfig
prepareStore :: Remote -> Preparer Storer
@ -135,6 +145,8 @@ prepareStore r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
store :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> IO (AWSResult ())
store (conn, bucket) r k p file = do
error "TODO"
{-
size <- (fromIntegral . fileSize <$> getFileStatus file) :: IO Integer
withMeteredFile file p $ \content -> do
-- size is provided to S3 so the whole content
@ -145,12 +157,16 @@ store (conn, bucket) r k p file = do
content
sendObject conn $
setStorageClass (getStorageClass $ config r) object
-}
prepareRetrieve :: Remote -> Preparer Retriever
prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
error "TODO"
{-
byteRetriever $ \k sink ->
liftIO (getObject conn $ bucketKey r bucket k)
>>= either s3Error (sink . obj_data)
-}
retrieveCheap :: Key -> FilePath -> Annex Bool
retrieveCheap _ _ = return False
@ -172,11 +188,14 @@ remove' r k = s3Action r False $ \(conn, bucket) ->
checkKey :: Remote -> CheckPresent
checkKey r k = s3Action r noconn $ \(conn, bucket) -> do
showAction $ "checking " ++ name r
{-
res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k
case res of
Right _ -> return True
Left (AWSError _ _) -> return False
Left e -> s3Error e
-}
error "TODO"
where
noconn = error "S3 not configured"
@ -185,9 +204,6 @@ s3Warning e = do
warning $ prettyReqError e
return False
s3Error :: ReqError -> a
s3Error e = error $ prettyReqError e
s3Bool :: AWSResult () -> Annex Bool
s3Bool (Right _) = return True
s3Bool (Left e) = s3Warning e
@ -229,76 +245,76 @@ iaMunge = (>>= munge)
{- Generate the bucket if it does not already exist, including creating the
- UUID file within the bucket.
-
- To check if the bucket exists, ask for its location. However, some ACLs
- can allow read/write to buckets, but not querying location, so first
- check if the UUID file already exists and we can skip doing anything.
- Some ACLs can allow read/write to buckets, but not querying them,
- so first check if the UUID file already exists and we can skip doing
- anything.
-}
genBucket :: RemoteConfig -> UUID -> Annex ()
genBucket c u = do
conn <- s3ConnectionRequired c u
showAction "checking bucket"
unlessM ((== Right True) <$> checkUUIDFile c u conn) $ do
loc <- liftIO $ getBucketLocation conn bucket
case loc of
Right _ -> writeUUIDFile c u
Left err@(NetworkError _) -> s3Error err
Left (AWSError _ _) -> do
showAction $ "creating bucket in " ++ datacenter
res <- liftIO $ createBucketIn conn bucket datacenter
case res of
Right _ -> writeUUIDFile c u
Left err -> s3Error err
withS3Handle c u $ \h ->
go h =<< checkUUIDFile c u h
where
bucket = fromJust $ getBucket c
go _ (Right True) = noop
go h _ = do
v <- sendS3Handle h (S3.getBucket bucket)
case v of
Right _ -> noop
Left _ -> do
showAction $ "creating bucket in " ++ datacenter
void $ mustSucceed $ sendS3Handle h $
S3.PutBucket bucket Nothing $
AWS.mkLocationConstraint $
T.pack datacenter
writeUUIDFile c u h
bucket = T.pack $ fromJust $ getBucket c
datacenter = fromJust $ M.lookup "datacenter" c
{- Writes the UUID to an annex-uuid file within the bucket.
-
- If the file already exists in the bucket, it must match.
-
- Note that IA items do not get created by createBucketIn.
- Rather, they are created the first time a file is stored in them.
- So this also takes care of that.
- Note that IA buckets can only created by having a file
- stored in them. So this also takes care of that.
-}
writeUUIDFile :: RemoteConfig -> UUID -> Annex ()
writeUUIDFile c u = do
conn <- s3ConnectionRequired c u
v <- checkUUIDFile c u conn
writeUUIDFile :: RemoteConfig -> UUID -> S3Handle -> Annex ()
writeUUIDFile c u h = do
v <- checkUUIDFile c u h
case v of
Left e -> error e
Right True -> return ()
Right False -> do
let object = setStorageClass (getStorageClass c) (mkobject uuidb)
either s3Error return =<< liftIO (sendObject conn object)
Left e -> throwM e
Right True -> noop
Right False -> void $ mustSucceed $ sendS3Handle h mkobject
where
file = uuidFile c
file = T.pack $ uuidFile c
uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u]
bucket = fromJust $ getBucket c
bucket = T.pack $ fromJust $ getBucket c
mkobject = S3Object bucket file "" (getXheaders c)
-- TODO: add headers from getXheaders
-- (See https://github.com/aristidb/aws/issues/119)
mkobject = (S3.putObject bucket file $ RequestBodyLBS uuidb)
{ S3.poStorageClass = Just (getStorageClass c) }
{- Checks if the UUID file exists in the bucket and has the specified UUID already. -}
checkUUIDFile :: RemoteConfig -> UUID -> AWSConnection -> Annex (Either String Bool)
checkUUIDFile c u conn = check <$> liftIO (tryNonAsync $ getObject conn $ mkobject L.empty)
{- Checks if the UUID file exists in the bucket
- and has the specified UUID already. -}
checkUUIDFile :: RemoteConfig -> UUID -> S3Handle -> Annex (Either SomeException Bool)
checkUUIDFile c u h = tryNonAsync $ check <$> get
where
check (Right (Right o))
| obj_data o == uuidb = Right True
| otherwise = Left $ "This bucket is already in use by a different S3 special remote, with UUID: " ++ show (obj_data o)
check _ = Right False
get = liftIO
. runResourceT
. either (pure . Left) (Right <$$> AWS.loadToMemory)
=<< sendS3Handle h (S3.getObject bucket file)
check (Right (S3.GetObjectMemoryResponse _meta rsp)) =
responseStatus rsp == ok200 && responseBody rsp == uuidb
check (Left _S3Error) = False
bucket = T.pack $ fromJust $ getBucket c
file = T.pack $ uuidFile c
uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u]
bucket = fromJust $ getBucket c
file = uuidFile c
mkobject = S3Object bucket file "" (getXheaders c)
uuidFile :: RemoteConfig -> FilePath
uuidFile c = filePrefix c ++ "annex-uuid"
s3ConnectionRequired :: RemoteConfig -> UUID -> Annex AWSConnection
s3ConnectionRequired c u =
maybe (error "Cannot connect to S3") return =<< s3Connection c u
s3Connection :: RemoteConfig -> UUID -> Annex (Maybe AWSConnection)
s3Connection c u = go =<< getRemoteCredPairFor "S3" c (AWS.creds u)
where
@ -311,13 +327,69 @@ s3Connection c u = go =<< getRemoteCredPairFor "S3" c (AWS.creds u)
[(p, _)] -> p
_ -> error $ "bad S3 port value: " ++ s
data S3Handle = S3Handle Manager AWS.Configuration (S3.S3Configuration AWS.NormalQuery)
{- Sends a request to S3 and gets back the response.
-
- Note that pureAws's use of ResourceT is bypassed here;
- the response should be processed while the S3Handle is still open,
- eg within a call to withS3Handle.
-}
sendS3Handle
:: (AWS.Transaction req res, AWS.ServiceConfiguration req ~ S3.S3Configuration)
=> S3Handle
-> req
-> Annex (Either S3.S3Error res)
sendS3Handle (S3Handle manager awscfg s3cfg) req = safely $ liftIO $
runResourceT $ AWS.pureAws awscfg s3cfg manager req
where
safely a = (Right <$> a) `catch` (pure . Left)
mustSucceed :: Annex (Either S3.S3Error res) -> Annex res
mustSucceed a = a >>= either s3Error return
s3Error :: S3.S3Error -> a
s3Error (S3.S3Error { S3.s3ErrorMessage = m }) = error $ "S3 error: " ++ T.unpack m
withS3Handle :: RemoteConfig -> UUID -> (S3Handle -> Annex a) -> Annex a
withS3Handle c u a = do
creds <- getRemoteCredPairFor "S3" c (AWS.creds u)
awscreds <- liftIO $ AWS.genCredentials $ fromMaybe nocreds creds
let awscfg = AWS.Configuration AWS.Timestamp awscreds (AWS.defaultLog AWS.Error)
bracketIO (newManager httpcfg) closeManager $ \mgr ->
a $ S3Handle mgr awscfg s3cfg
where
s3cfg = s3Configuration c
httpcfg = defaultManagerSettings
{ managerResponseTimeout = Nothing }
nocreds = error "Cannot use S3 without credentials configured"
s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery
s3Configuration c = (S3.s3 proto endpoint False) { S3.s3Port = port }
where
proto
| port == 443 = AWS.HTTPS
| otherwise = AWS.HTTP
host = fromJust $ M.lookup "host" c
datacenter = fromJust $ M.lookup "datacenter" c
-- When the default S3 host is configured, connect directly to
-- the S3 endpoint for the configured datacenter.
-- When another host is configured, it's used as-is.
endpoint
| host == AWS.s3DefaultHost = AWS.s3HostName $ T.pack datacenter
| otherwise = T.encodeUtf8 $ T.pack host
port = let s = fromJust $ M.lookup "port" c in
case reads s of
[(p, _)] -> p
_ -> error $ "bad S3 port value: " ++ s
getBucket :: RemoteConfig -> Maybe Bucket
getBucket = M.lookup "bucket"
getStorageClass :: RemoteConfig -> StorageClass
getStorageClass :: RemoteConfig -> S3.StorageClass
getStorageClass c = case fromJust $ M.lookup "storageclass" c of
"REDUCED_REDUNDANCY" -> REDUCED_REDUNDANCY
_ -> STANDARD
"REDUCED_REDUNDANCY" -> S3.ReducedRedundancy
_ -> S3.Standard
getXheaders :: RemoteConfig -> [(String, String)]
getXheaders = filter isxheader . M.assocs

4
debian/changelog vendored
View file

@ -21,6 +21,10 @@ git-annex (5.20140718) UNRELEASED; urgency=medium
* WebDAV: Avoid buffering whole file in memory when uploading and
downloading.
* WebDAV: Dropped support for DAV before 1.0.
* S3: Switched to using the haskell aws library.
* S3: Now supports https. To enable this, configure a S3 special remote to
use port=443. However, with encrypted special remotes, this does not
add any security.
* testremote: New command to test uploads/downloads to a remote.
* Dropping an object from a bup special remote now deletes the git branch
for the object, although of course the object's content cannot be deleted

View file

@ -33,7 +33,8 @@ the S3 remote.
embedcreds without gpg encryption.
* `datacenter` - Defaults to "US". Other values include "EU",
"us-west-1", and "ap-southeast-1".
"us-west-1", "us-west-2", "ap-southeast-1", "ap-southeast-2", and
"sa-east-1".
* `storageclass` - Default is "STANDARD". If you have configured git-annex
to preserve multiple [[copies]], consider setting this to "REDUCED_REDUNDANCY"

View file

@ -137,7 +137,7 @@ Executable git-annex
CPP-Options: -DWITH_CRYPTOHASH
if flag(S3)
Build-Depends: hS3
Build-Depends: hS3, http-conduit, http-client, resourcet, http-types, aws
CPP-Options: -DWITH_S3
if flag(WebDAV)