7225c2bfc0
In order to record a semi-useful filename associated with the key, this required plumbing the filename all the way through to the remotes' storeKey and retrieveKeyFile. Note that there is potential for deadlock here, narrowly avoided. Suppose the repos are A and B. A sends file foo to B, and at the same time, B gets file foo from A. So, A locks its upload transfer info file, and then locks B's download transfer info file. At the same time, B is taking the two locks in the opposite order. This is only not a deadlock because the lock code does not wait, and aborts. So one of A or B's transfers will be aborted and the other transfer will continue. Whew!
320 lines
9.4 KiB
Haskell
320 lines
9.4 KiB
Haskell
{- Amazon S3 remotes.
|
|
-
|
|
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
|
-
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
-}
|
|
|
|
module Remote.S3 (remote) where
|
|
|
|
import Network.AWS.AWSConnection
|
|
import Network.AWS.S3Object
|
|
import Network.AWS.S3Bucket hiding (size)
|
|
import Network.AWS.AWSResult
|
|
import qualified Data.ByteString.Lazy.Char8 as L
|
|
import qualified Data.Map as M
|
|
import Data.Char
|
|
import System.Environment
|
|
import System.Posix.Env (setEnv)
|
|
|
|
import Common.Annex
|
|
import Types.Remote
|
|
import Types.Key
|
|
import qualified Git
|
|
import Config
|
|
import Remote.Helper.Special
|
|
import Remote.Helper.Encryptable
|
|
import Crypto
|
|
import Annex.Content
|
|
import Utility.Base64
|
|
|
|
remote :: RemoteType
|
|
remote = RemoteType {
|
|
typename = "S3",
|
|
enumerate = findSpecialRemotes "s3",
|
|
generate = gen,
|
|
setup = s3Setup
|
|
}
|
|
|
|
gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex Remote
|
|
gen r u c = do
|
|
cst <- remoteCost r expensiveRemoteCost
|
|
return $ gen' r u c cst
|
|
gen' :: Git.Repo -> UUID -> Maybe RemoteConfig -> Int -> Remote
|
|
gen' r u c cst =
|
|
encryptableRemote c
|
|
(storeEncrypted this)
|
|
(retrieveEncrypted this)
|
|
this
|
|
where
|
|
this = Remote {
|
|
uuid = u,
|
|
cost = cst,
|
|
name = Git.repoDescribe r,
|
|
storeKey = store this,
|
|
retrieveKeyFile = retrieve this,
|
|
retrieveKeyFileCheap = retrieveCheap this,
|
|
removeKey = remove this,
|
|
hasKey = checkPresent this,
|
|
hasKeyCheap = False,
|
|
whereisKey = Nothing,
|
|
config = c,
|
|
repo = r,
|
|
remotetype = remote
|
|
}
|
|
|
|
s3Setup :: UUID -> RemoteConfig -> Annex RemoteConfig
|
|
s3Setup u c = handlehost $ M.lookup "host" c
|
|
where
|
|
remotename = fromJust (M.lookup "name" c)
|
|
defbucket = remotename ++ "-" ++ fromUUID u
|
|
defaults = M.fromList
|
|
[ ("datacenter", "US")
|
|
, ("storageclass", "STANDARD")
|
|
, ("host", defaultAmazonS3Host)
|
|
, ("port", show defaultAmazonS3Port)
|
|
, ("bucket", defbucket)
|
|
]
|
|
|
|
handlehost Nothing = defaulthost
|
|
handlehost (Just h)
|
|
| ".archive.org" `isSuffixOf` map toLower h = archiveorg
|
|
| otherwise = defaulthost
|
|
|
|
use fullconfig = do
|
|
gitConfigSpecialRemote u fullconfig "s3" "true"
|
|
s3SetCreds fullconfig
|
|
|
|
defaulthost = do
|
|
c' <- encryptionSetup c
|
|
let fullconfig = c' `M.union` defaults
|
|
genBucket fullconfig
|
|
use fullconfig
|
|
|
|
archiveorg = do
|
|
showNote "Internet Archive mode"
|
|
maybe (error "specify bucket=") (const noop) $
|
|
M.lookup "bucket" archiveconfig
|
|
use archiveconfig
|
|
where
|
|
archiveconfig =
|
|
-- hS3 does not pass through
|
|
-- x-archive-* headers
|
|
M.mapKeys (replace "x-archive-" "x-amz-") $
|
|
-- encryption does not make sense here
|
|
M.insert "encryption" "none" $
|
|
M.union c $
|
|
-- special constraints on key names
|
|
M.insert "mungekeys" "ia" $
|
|
-- bucket created only when files
|
|
-- are uploaded
|
|
M.insert "x-amz-auto-make-bucket" "1" $
|
|
-- no default bucket name; should
|
|
-- be human-readable
|
|
M.delete "bucket" defaults
|
|
|
|
store :: Remote -> Key -> AssociatedFile -> Annex Bool
|
|
store r k _f = s3Action r False $ \(conn, bucket) -> do
|
|
dest <- inRepo $ gitAnnexLocation k
|
|
res <- liftIO $ storeHelper (conn, bucket) r k dest
|
|
s3Bool res
|
|
|
|
storeEncrypted :: Remote -> (Cipher, Key) -> Key -> Annex Bool
|
|
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) ->
|
|
-- To get file size of the encrypted content, have to use a temp file.
|
|
-- (An alternative would be chunking to to a constant size.)
|
|
withTmp enck $ \tmp -> do
|
|
f <- inRepo $ gitAnnexLocation k
|
|
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
|
|
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
|
|
s3Bool res
|
|
|
|
storeHelper :: (AWSConnection, String) -> Remote -> Key -> FilePath -> IO (AWSResult ())
|
|
storeHelper (conn, bucket) r k file = do
|
|
content <- liftIO $ L.readFile file
|
|
-- size is provided to S3 so the whole content does not need to be
|
|
-- buffered to calculate it
|
|
size <- maybe getsize (return . fromIntegral) $ keySize k
|
|
let object = setStorageClass storageclass $
|
|
S3Object bucket (bucketFile r k) ""
|
|
(("Content-Length", show size) : xheaders) content
|
|
sendObject conn object
|
|
where
|
|
storageclass =
|
|
case fromJust $ M.lookup "storageclass" $ fromJust $ config r of
|
|
"REDUCED_REDUNDANCY" -> REDUCED_REDUNDANCY
|
|
_ -> STANDARD
|
|
getsize = fileSize <$> (liftIO $ getFileStatus file)
|
|
|
|
xheaders = filter isxheader $ M.assocs $ fromJust $ config r
|
|
isxheader (h, _) = "x-amz-" `isPrefixOf` h
|
|
|
|
retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> Annex Bool
|
|
retrieve r k _f d = s3Action r False $ \(conn, bucket) -> do
|
|
res <- liftIO $ getObject conn $ bucketKey r bucket k
|
|
case res of
|
|
Right o -> do
|
|
liftIO $ L.writeFile d $ obj_data o
|
|
return True
|
|
Left e -> s3Warning e
|
|
|
|
retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool
|
|
retrieveCheap _ _ _ = return False
|
|
|
|
retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
|
|
retrieveEncrypted r (cipher, enck) _ f = s3Action r False $ \(conn, bucket) -> do
|
|
res <- liftIO $ getObject conn $ bucketKey r bucket enck
|
|
case res of
|
|
Right o -> liftIO $
|
|
withDecryptedContent cipher (return $ obj_data o) $ \content -> do
|
|
L.writeFile f content
|
|
return True
|
|
Left e -> s3Warning e
|
|
|
|
remove :: Remote -> Key -> Annex Bool
|
|
remove r k = s3Action r False $ \(conn, bucket) -> do
|
|
res <- liftIO $ deleteObject conn $ bucketKey r bucket k
|
|
s3Bool res
|
|
|
|
checkPresent :: Remote -> Key -> Annex (Either String Bool)
|
|
checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do
|
|
showAction $ "checking " ++ name r
|
|
res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k
|
|
case res of
|
|
Right _ -> return $ Right True
|
|
Left (AWSError _ _) -> return $ Right False
|
|
Left e -> return $ Left (s3Error e)
|
|
where
|
|
noconn = Left $ error "S3 not configured"
|
|
|
|
s3Warning :: ReqError -> Annex Bool
|
|
s3Warning e = do
|
|
warning $ prettyReqError e
|
|
return False
|
|
|
|
s3Error :: ReqError -> a
|
|
s3Error e = error $ prettyReqError e
|
|
|
|
s3Bool :: AWSResult () -> Annex Bool
|
|
s3Bool (Right _) = return True
|
|
s3Bool (Left e) = s3Warning e
|
|
|
|
s3Action :: Remote -> a -> ((AWSConnection, String) -> Annex a) -> Annex a
|
|
s3Action r noconn action = do
|
|
when (isNothing $ config r) $
|
|
error $ "Missing configuration for special remote " ++ name r
|
|
let bucket = M.lookup "bucket" $ fromJust $ config r
|
|
conn <- s3Connection $ fromJust $ config r
|
|
case (bucket, conn) of
|
|
(Just b, Just c) -> action (c, b)
|
|
_ -> return noconn
|
|
|
|
bucketFile :: Remote -> Key -> FilePath
|
|
bucketFile r = munge . show
|
|
where
|
|
munge s = case M.lookup "mungekeys" $ fromJust $ config r of
|
|
Just "ia" -> iaMunge s
|
|
_ -> s
|
|
|
|
bucketKey :: Remote -> String -> Key -> S3Object
|
|
bucketKey r bucket k = S3Object bucket (bucketFile r k) "" [] L.empty
|
|
|
|
{- Internet Archive limits filenames to a subset of ascii,
|
|
- with no whitespace. Other characters are xml entity
|
|
- encoded. -}
|
|
iaMunge :: String -> String
|
|
iaMunge = (>>= munge)
|
|
where
|
|
munge c
|
|
| isAsciiUpper c || isAsciiLower c || isNumber c = [c]
|
|
| c `elem` "_-.\"" = [c]
|
|
| isSpace c = []
|
|
| otherwise = "&" ++ show (ord c) ++ ";"
|
|
|
|
genBucket :: RemoteConfig -> Annex ()
|
|
genBucket c = do
|
|
conn <- s3ConnectionRequired c
|
|
showAction "checking bucket"
|
|
loc <- liftIO $ getBucketLocation conn bucket
|
|
case loc of
|
|
Right _ -> noop
|
|
Left err@(NetworkError _) -> s3Error err
|
|
Left (AWSError _ _) -> do
|
|
showAction $ "creating bucket in " ++ datacenter
|
|
res <- liftIO $ createBucketIn conn bucket datacenter
|
|
case res of
|
|
Right _ -> noop
|
|
Left err -> s3Error err
|
|
where
|
|
bucket = fromJust $ M.lookup "bucket" c
|
|
datacenter = fromJust $ M.lookup "datacenter" c
|
|
|
|
s3ConnectionRequired :: RemoteConfig -> Annex AWSConnection
|
|
s3ConnectionRequired c =
|
|
maybe (error "Cannot connect to S3") return =<< s3Connection c
|
|
|
|
s3Connection :: RemoteConfig -> Annex (Maybe AWSConnection)
|
|
s3Connection c = do
|
|
creds <- s3GetCreds c
|
|
case creds of
|
|
Just (ak, sk) -> return $ Just $ AWSConnection host port ak sk
|
|
_ -> do
|
|
warning $ "Set both " ++ s3AccessKey ++ " and " ++ s3SecretKey ++ " to use S3"
|
|
return Nothing
|
|
where
|
|
host = fromJust $ M.lookup "host" c
|
|
port = let s = fromJust $ M.lookup "port" c in
|
|
case reads s of
|
|
[(p, _)] -> p
|
|
_ -> error $ "bad S3 port value: " ++ s
|
|
|
|
{- S3 creds come from the environment if set.
|
|
- Otherwise, might be stored encrypted in the remote's config. -}
|
|
s3GetCreds :: RemoteConfig -> Annex (Maybe (String, String))
|
|
s3GetCreds c = maybe fromconfig (return . Just) =<< liftIO getenv
|
|
where
|
|
getenv = liftM2 (,)
|
|
<$> get s3AccessKey
|
|
<*> get s3SecretKey
|
|
where
|
|
get = catchMaybeIO . getEnv
|
|
setenv (ak, sk) = do
|
|
setEnv s3AccessKey ak True
|
|
setEnv s3SecretKey sk True
|
|
fromconfig = do
|
|
mcipher <- remoteCipher c
|
|
case (M.lookup "s3creds" c, mcipher) of
|
|
(Just s3creds, Just cipher) ->
|
|
liftIO $ decrypt s3creds cipher
|
|
_ -> return Nothing
|
|
decrypt s3creds cipher = do
|
|
creds <- lines <$>
|
|
withDecryptedContent cipher
|
|
(return $ L.pack $ fromB64 s3creds)
|
|
(return . L.unpack)
|
|
case creds of
|
|
[ak, sk] -> do
|
|
setenv (ak, sk)
|
|
return $ Just (ak, sk)
|
|
_ -> do error "bad s3creds"
|
|
|
|
{- Stores S3 creds encrypted in the remote's config if possible. -}
|
|
s3SetCreds :: RemoteConfig -> Annex RemoteConfig
|
|
s3SetCreds c = do
|
|
creds <- s3GetCreds c
|
|
case creds of
|
|
Just (ak, sk) -> do
|
|
mcipher <- remoteCipher c
|
|
case mcipher of
|
|
Just cipher -> do
|
|
s <- liftIO $ withEncryptedContent cipher
|
|
(return $ L.pack $ unlines [ak, sk])
|
|
(return . L.unpack)
|
|
return $ M.insert "s3creds" (toB64 s) c
|
|
Nothing -> return c
|
|
_ -> return c
|
|
|
|
s3AccessKey :: String
|
|
s3AccessKey = "AWS_ACCESS_KEY_ID"
|
|
s3SecretKey :: String
|
|
s3SecretKey = "AWS_SECRET_ACCESS_KEY"
|