WIP converting S3 special remote from hS3 to aws library

Currently, initremote works, but not the other operations. They should be
fairly easy to add from this base.

Also, https://github.com/aristidb/aws/issues/119 blocks internet archive
support.

Note that since http-conduit is used, this also adds https support to S3.
Although git-annex encrypts everything anyway, so that may not be extremely
useful. It is not enabled by default, because existing S3 special remotes
have port=80 in their config. Setting port=443 will enable it.

This commit was sponsored by Daniel Brockman.
This commit is contained in:
Joey Hess 2014-08-08 18:54:04 -04:00
parent 7712e70885
commit 6fcca2f13e
5 changed files with 165 additions and 64 deletions

View file

@ -1,6 +1,6 @@
{- Amazon Web Services common infrastructure. {- Amazon Web Services common infrastructure.
- -
- Copyright 2011,2012 Joey Hess <joey@kitenet.net> - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -12,8 +12,14 @@ module Remote.Helper.AWS where
import Common.Annex import Common.Annex
import Creds import Creds
import qualified Aws
import qualified Aws.S3 as S3
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString as B
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import Data.Text (Text) import Data.Text (Text)
import Data.IORef
creds :: UUID -> CredPairStorage creds :: UUID -> CredPairStorage
creds u = CredPairStorage creds u = CredPairStorage
@ -22,6 +28,13 @@ creds u = CredPairStorage
, credPairRemoteKey = Just "s3creds" , credPairRemoteKey = Just "s3creds"
} }
genCredentials :: CredPair -> IO Aws.Credentials
genCredentials (keyid, secret) = Aws.Credentials
<$> pure (encodeUtf8 (T.pack keyid))
<*> pure (encodeUtf8 (T.pack secret))
<*> newIORef []
<*> pure Nothing
data Service = S3 | Glacier data Service = S3 | Glacier
deriving (Eq) deriving (Eq)
@ -33,9 +46,10 @@ regionMap = M.fromList . regionInfo
defaultRegion :: Service -> Region defaultRegion :: Service -> Region
defaultRegion = snd . Prelude.head . regionInfo defaultRegion = snd . Prelude.head . regionInfo
{- S3 and Glacier use different names for some regions. Ie, "us-east-1" data ServiceRegion = BothRegion Region | S3Region Region | GlacierRegion Region
- cannot be used with S3, while "US" cannot be used with Glacier. Dunno why.
- Also, Glacier is not yet available in all regions. -} {- The "US" and "EU" names are used as location constraints when creating a
- S3 bucket. -}
regionInfo :: Service -> [(Text, Region)] regionInfo :: Service -> [(Text, Region)]
regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $ regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $
filter (matchingService . snd) $ filter (matchingService . snd) $
@ -60,4 +74,14 @@ regionInfo service = map (\(t, r) -> (t, fromServiceRegion r)) $
matchingService (S3Region _) = service == S3 matchingService (S3Region _) = service == S3
matchingService (GlacierRegion _) = service == Glacier matchingService (GlacierRegion _) = service == Glacier
data ServiceRegion = BothRegion Region | S3Region Region | GlacierRegion Region s3HostName :: Region -> B.ByteString
s3HostName "US" = "s3.amazonaws.com"
s3HostName "EU" = "s3-eu-west-1.amazonaws.com"
s3HostName r = encodeUtf8 $ T.concat ["s3-", r, ".amazonaws.com"]
s3DefaultHost :: String
s3DefaultHost = "s3.amazonaws.com"
mkLocationConstraint :: Region -> S3.LocationConstraint
mkLocationConstraint "US" = S3.locationUsClassic
mkLocationConstraint r = r

View file

@ -1,15 +1,19 @@
{- S3 remotes {- S3 remotes
- -
- Copyright 2011-2013 Joey Hess <joey@kitenet.net> - Copyright 2011-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# LANGUAGE TypeFamilies #-}
module Remote.S3 (remote, iaHost, isIA, isIAHost, iaItemUrl) where module Remote.S3 (remote, iaHost, isIA, isIAHost, iaItemUrl) where
import qualified Aws as AWS
import qualified Aws.Core as AWS
import qualified Aws.S3 as S3
import Network.AWS.AWSConnection import Network.AWS.AWSConnection
import Network.AWS.S3Object hiding (getStorageClass) import Network.AWS.S3Object hiding (getStorageClass)
import Network.AWS.S3Bucket hiding (size)
import Network.AWS.AWSResult import Network.AWS.AWSResult
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding as T
@ -17,6 +21,11 @@ import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M import qualified Data.Map as M
import Data.Char import Data.Char
import Network.Socket (HostName) import Network.Socket (HostName)
import Network.HTTP.Conduit (Manager, newManager, closeManager)
import Network.HTTP.Client (defaultManagerSettings, managerResponseTimeout, responseStatus, responseBody, RequestBody(..))
import Network.HTTP.Types
import Control.Monad.Trans.Resource
import Control.Monad.Catch
import Common.Annex import Common.Annex
import Types.Remote import Types.Remote
@ -86,8 +95,8 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost
defaults = M.fromList defaults = M.fromList
[ ("datacenter", T.unpack $ AWS.defaultRegion AWS.S3) [ ("datacenter", T.unpack $ AWS.defaultRegion AWS.S3)
, ("storageclass", "STANDARD") , ("storageclass", "STANDARD")
, ("host", defaultAmazonS3Host) , ("host", AWS.s3DefaultHost)
, ("port", show defaultAmazonS3Port) , ("port", "80")
, ("bucket", defbucket) , ("bucket", defbucket)
] ]
@ -119,6 +128,7 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost
M.insert "mungekeys" "ia" $ M.insert "mungekeys" "ia" $
-- bucket created only when files are uploaded -- bucket created only when files are uploaded
M.insert "x-amz-auto-make-bucket" "1" defaults M.insert "x-amz-auto-make-bucket" "1" defaults
withS3Handle archiveconfig u $
writeUUIDFile archiveconfig u writeUUIDFile archiveconfig u
use archiveconfig use archiveconfig
@ -135,6 +145,8 @@ prepareStore r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
store :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> IO (AWSResult ()) store :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> IO (AWSResult ())
store (conn, bucket) r k p file = do store (conn, bucket) r k p file = do
error "TODO"
{-
size <- (fromIntegral . fileSize <$> getFileStatus file) :: IO Integer size <- (fromIntegral . fileSize <$> getFileStatus file) :: IO Integer
withMeteredFile file p $ \content -> do withMeteredFile file p $ \content -> do
-- size is provided to S3 so the whole content -- size is provided to S3 so the whole content
@ -145,12 +157,16 @@ store (conn, bucket) r k p file = do
content content
sendObject conn $ sendObject conn $
setStorageClass (getStorageClass $ config r) object setStorageClass (getStorageClass $ config r) object
-}
prepareRetrieve :: Remote -> Preparer Retriever prepareRetrieve :: Remote -> Preparer Retriever
prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) -> prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
error "TODO"
{-
byteRetriever $ \k sink -> byteRetriever $ \k sink ->
liftIO (getObject conn $ bucketKey r bucket k) liftIO (getObject conn $ bucketKey r bucket k)
>>= either s3Error (sink . obj_data) >>= either s3Error (sink . obj_data)
-}
retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap :: Key -> FilePath -> Annex Bool
retrieveCheap _ _ = return False retrieveCheap _ _ = return False
@ -172,11 +188,14 @@ remove' r k = s3Action r False $ \(conn, bucket) ->
checkKey :: Remote -> CheckPresent checkKey :: Remote -> CheckPresent
checkKey r k = s3Action r noconn $ \(conn, bucket) -> do checkKey r k = s3Action r noconn $ \(conn, bucket) -> do
showAction $ "checking " ++ name r showAction $ "checking " ++ name r
{-
res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k
case res of case res of
Right _ -> return True Right _ -> return True
Left (AWSError _ _) -> return False Left (AWSError _ _) -> return False
Left e -> s3Error e Left e -> s3Error e
-}
error "TODO"
where where
noconn = error "S3 not configured" noconn = error "S3 not configured"
@ -185,9 +204,6 @@ s3Warning e = do
warning $ prettyReqError e warning $ prettyReqError e
return False return False
s3Error :: ReqError -> a
s3Error e = error $ prettyReqError e
s3Bool :: AWSResult () -> Annex Bool s3Bool :: AWSResult () -> Annex Bool
s3Bool (Right _) = return True s3Bool (Right _) = return True
s3Bool (Left e) = s3Warning e s3Bool (Left e) = s3Warning e
@ -229,76 +245,76 @@ iaMunge = (>>= munge)
{- Generate the bucket if it does not already exist, including creating the {- Generate the bucket if it does not already exist, including creating the
- UUID file within the bucket. - UUID file within the bucket.
- -
- To check if the bucket exists, ask for its location. However, some ACLs - Some ACLs can allow read/write to buckets, but not querying them,
- can allow read/write to buckets, but not querying location, so first - so first check if the UUID file already exists and we can skip doing
- check if the UUID file already exists and we can skip doing anything. - anything.
-} -}
genBucket :: RemoteConfig -> UUID -> Annex () genBucket :: RemoteConfig -> UUID -> Annex ()
genBucket c u = do genBucket c u = do
conn <- s3ConnectionRequired c u
showAction "checking bucket" showAction "checking bucket"
unlessM ((== Right True) <$> checkUUIDFile c u conn) $ do withS3Handle c u $ \h ->
loc <- liftIO $ getBucketLocation conn bucket go h =<< checkUUIDFile c u h
case loc of
Right _ -> writeUUIDFile c u
Left err@(NetworkError _) -> s3Error err
Left (AWSError _ _) -> do
showAction $ "creating bucket in " ++ datacenter
res <- liftIO $ createBucketIn conn bucket datacenter
case res of
Right _ -> writeUUIDFile c u
Left err -> s3Error err
where where
bucket = fromJust $ getBucket c go _ (Right True) = noop
go h _ = do
v <- sendS3Handle h (S3.getBucket bucket)
case v of
Right _ -> noop
Left _ -> do
showAction $ "creating bucket in " ++ datacenter
void $ mustSucceed $ sendS3Handle h $
S3.PutBucket bucket Nothing $
AWS.mkLocationConstraint $
T.pack datacenter
writeUUIDFile c u h
bucket = T.pack $ fromJust $ getBucket c
datacenter = fromJust $ M.lookup "datacenter" c datacenter = fromJust $ M.lookup "datacenter" c
{- Writes the UUID to an annex-uuid file within the bucket. {- Writes the UUID to an annex-uuid file within the bucket.
- -
- If the file already exists in the bucket, it must match. - If the file already exists in the bucket, it must match.
- -
- Note that IA items do not get created by createBucketIn. - Note that IA buckets can only created by having a file
- Rather, they are created the first time a file is stored in them. - stored in them. So this also takes care of that.
- So this also takes care of that.
-} -}
writeUUIDFile :: RemoteConfig -> UUID -> Annex () writeUUIDFile :: RemoteConfig -> UUID -> S3Handle -> Annex ()
writeUUIDFile c u = do writeUUIDFile c u h = do
conn <- s3ConnectionRequired c u v <- checkUUIDFile c u h
v <- checkUUIDFile c u conn
case v of case v of
Left e -> error e Left e -> throwM e
Right True -> return () Right True -> noop
Right False -> do Right False -> void $ mustSucceed $ sendS3Handle h mkobject
let object = setStorageClass (getStorageClass c) (mkobject uuidb)
either s3Error return =<< liftIO (sendObject conn object)
where where
file = uuidFile c file = T.pack $ uuidFile c
uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u] uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u]
bucket = fromJust $ getBucket c bucket = T.pack $ fromJust $ getBucket c
mkobject = S3Object bucket file "" (getXheaders c) -- TODO: add headers from getXheaders
-- (See https://github.com/aristidb/aws/issues/119)
mkobject = (S3.putObject bucket file $ RequestBodyLBS uuidb)
{ S3.poStorageClass = Just (getStorageClass c) }
{- Checks if the UUID file exists in the bucket and has the specified UUID already. -} {- Checks if the UUID file exists in the bucket
checkUUIDFile :: RemoteConfig -> UUID -> AWSConnection -> Annex (Either String Bool) - and has the specified UUID already. -}
checkUUIDFile c u conn = check <$> liftIO (tryNonAsync $ getObject conn $ mkobject L.empty) checkUUIDFile :: RemoteConfig -> UUID -> S3Handle -> Annex (Either SomeException Bool)
checkUUIDFile c u h = tryNonAsync $ check <$> get
where where
check (Right (Right o)) get = liftIO
| obj_data o == uuidb = Right True . runResourceT
| otherwise = Left $ "This bucket is already in use by a different S3 special remote, with UUID: " ++ show (obj_data o) . either (pure . Left) (Right <$$> AWS.loadToMemory)
check _ = Right False =<< sendS3Handle h (S3.getObject bucket file)
check (Right (S3.GetObjectMemoryResponse _meta rsp)) =
responseStatus rsp == ok200 && responseBody rsp == uuidb
check (Left _S3Error) = False
bucket = T.pack $ fromJust $ getBucket c
file = T.pack $ uuidFile c
uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u] uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u]
bucket = fromJust $ getBucket c
file = uuidFile c
mkobject = S3Object bucket file "" (getXheaders c)
uuidFile :: RemoteConfig -> FilePath uuidFile :: RemoteConfig -> FilePath
uuidFile c = filePrefix c ++ "annex-uuid" uuidFile c = filePrefix c ++ "annex-uuid"
s3ConnectionRequired :: RemoteConfig -> UUID -> Annex AWSConnection
s3ConnectionRequired c u =
maybe (error "Cannot connect to S3") return =<< s3Connection c u
s3Connection :: RemoteConfig -> UUID -> Annex (Maybe AWSConnection) s3Connection :: RemoteConfig -> UUID -> Annex (Maybe AWSConnection)
s3Connection c u = go =<< getRemoteCredPairFor "S3" c (AWS.creds u) s3Connection c u = go =<< getRemoteCredPairFor "S3" c (AWS.creds u)
where where
@ -311,13 +327,69 @@ s3Connection c u = go =<< getRemoteCredPairFor "S3" c (AWS.creds u)
[(p, _)] -> p [(p, _)] -> p
_ -> error $ "bad S3 port value: " ++ s _ -> error $ "bad S3 port value: " ++ s
data S3Handle = S3Handle Manager AWS.Configuration (S3.S3Configuration AWS.NormalQuery)
{- Sends a request to S3 and gets back the response.
-
- Note that pureAws's use of ResourceT is bypassed here;
- the response should be processed while the S3Handle is still open,
- eg within a call to withS3Handle.
-}
sendS3Handle
:: (AWS.Transaction req res, AWS.ServiceConfiguration req ~ S3.S3Configuration)
=> S3Handle
-> req
-> Annex (Either S3.S3Error res)
sendS3Handle (S3Handle manager awscfg s3cfg) req = safely $ liftIO $
runResourceT $ AWS.pureAws awscfg s3cfg manager req
where
safely a = (Right <$> a) `catch` (pure . Left)
mustSucceed :: Annex (Either S3.S3Error res) -> Annex res
mustSucceed a = a >>= either s3Error return
s3Error :: S3.S3Error -> a
s3Error (S3.S3Error { S3.s3ErrorMessage = m }) = error $ "S3 error: " ++ T.unpack m
withS3Handle :: RemoteConfig -> UUID -> (S3Handle -> Annex a) -> Annex a
withS3Handle c u a = do
creds <- getRemoteCredPairFor "S3" c (AWS.creds u)
awscreds <- liftIO $ AWS.genCredentials $ fromMaybe nocreds creds
let awscfg = AWS.Configuration AWS.Timestamp awscreds (AWS.defaultLog AWS.Error)
bracketIO (newManager httpcfg) closeManager $ \mgr ->
a $ S3Handle mgr awscfg s3cfg
where
s3cfg = s3Configuration c
httpcfg = defaultManagerSettings
{ managerResponseTimeout = Nothing }
nocreds = error "Cannot use S3 without credentials configured"
s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery
s3Configuration c = (S3.s3 proto endpoint False) { S3.s3Port = port }
where
proto
| port == 443 = AWS.HTTPS
| otherwise = AWS.HTTP
host = fromJust $ M.lookup "host" c
datacenter = fromJust $ M.lookup "datacenter" c
-- When the default S3 host is configured, connect directly to
-- the S3 endpoint for the configured datacenter.
-- When another host is configured, it's used as-is.
endpoint
| host == AWS.s3DefaultHost = AWS.s3HostName $ T.pack datacenter
| otherwise = T.encodeUtf8 $ T.pack host
port = let s = fromJust $ M.lookup "port" c in
case reads s of
[(p, _)] -> p
_ -> error $ "bad S3 port value: " ++ s
getBucket :: RemoteConfig -> Maybe Bucket getBucket :: RemoteConfig -> Maybe Bucket
getBucket = M.lookup "bucket" getBucket = M.lookup "bucket"
getStorageClass :: RemoteConfig -> StorageClass getStorageClass :: RemoteConfig -> S3.StorageClass
getStorageClass c = case fromJust $ M.lookup "storageclass" c of getStorageClass c = case fromJust $ M.lookup "storageclass" c of
"REDUCED_REDUNDANCY" -> REDUCED_REDUNDANCY "REDUCED_REDUNDANCY" -> S3.ReducedRedundancy
_ -> STANDARD _ -> S3.Standard
getXheaders :: RemoteConfig -> [(String, String)] getXheaders :: RemoteConfig -> [(String, String)]
getXheaders = filter isxheader . M.assocs getXheaders = filter isxheader . M.assocs

4
debian/changelog vendored
View file

@ -21,6 +21,10 @@ git-annex (5.20140718) UNRELEASED; urgency=medium
* WebDAV: Avoid buffering whole file in memory when uploading and * WebDAV: Avoid buffering whole file in memory when uploading and
downloading. downloading.
* WebDAV: Dropped support for DAV before 1.0. * WebDAV: Dropped support for DAV before 1.0.
* S3: Switched to using the haskell aws library.
* S3: Now supports https. To enable this, configure a S3 special remote to
use port=443. However, with encrypted special remotes, this does not
add any security.
* testremote: New command to test uploads/downloads to a remote. * testremote: New command to test uploads/downloads to a remote.
* Dropping an object from a bup special remote now deletes the git branch * Dropping an object from a bup special remote now deletes the git branch
for the object, although of course the object's content cannot be deleted for the object, although of course the object's content cannot be deleted

View file

@ -33,7 +33,8 @@ the S3 remote.
embedcreds without gpg encryption. embedcreds without gpg encryption.
* `datacenter` - Defaults to "US". Other values include "EU", * `datacenter` - Defaults to "US". Other values include "EU",
"us-west-1", and "ap-southeast-1". "us-west-1", "us-west-2", "ap-southeast-1", "ap-southeast-2", and
"sa-east-1".
* `storageclass` - Default is "STANDARD". If you have configured git-annex * `storageclass` - Default is "STANDARD". If you have configured git-annex
to preserve multiple [[copies]], consider setting this to "REDUCED_REDUNDANCY" to preserve multiple [[copies]], consider setting this to "REDUCED_REDUNDANCY"

View file

@ -137,7 +137,7 @@ Executable git-annex
CPP-Options: -DWITH_CRYPTOHASH CPP-Options: -DWITH_CRYPTOHASH
if flag(S3) if flag(S3)
Build-Depends: hS3 Build-Depends: hS3, http-conduit, http-client, resourcet, http-types, aws
CPP-Options: -DWITH_S3 CPP-Options: -DWITH_S3
if flag(WebDAV) if flag(WebDAV)