basic s3 remote start

But bucket name is not handled right; it needs to be globally unique.
This commit is contained in:
Joey Hess 2011-03-28 01:32:47 -04:00
parent 026c76914e
commit a7bd63eb01
3 changed files with 89 additions and 24 deletions

View file

@ -31,11 +31,11 @@ setConfig k value = do
getConfig :: Git.Repo -> ConfigKey -> String -> Annex String getConfig :: Git.Repo -> ConfigKey -> String -> Annex String
getConfig r key def = do getConfig r key def = do
g <- Annex.gitRepo g <- Annex.gitRepo
let def' = Git.configGet g global def let def' = Git.configGet g ("annex." ++ key) def
return $ Git.configGet g local def' return $ Git.configGet g (remoteConfig r key) def'
where
local = "remote." ++ fromMaybe "" (Git.repoRemoteName r) ++ ".annex-" ++ key remoteConfig :: Git.Repo -> ConfigKey -> String
global = "annex." ++ key remoteConfig r key = "remote." ++ fromMaybe "" (Git.repoRemoteName r) ++ ".annex-" ++ key
{- Calculates cost for a remote. {- Calculates cost for a remote.
- -

View file

@ -7,9 +7,18 @@
module Remote.S3 (generate) where module Remote.S3 (generate) where
import Network.AWS.AWSConnection
import Network.AWS.S3Object
import Network.AWS.S3Bucket
import Network.AWS.AWSResult
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.String.Utils import Data.String.Utils
import Control.Monad (filterM, liftM) import Control.Monad (filterM, liftM, when)
import Control.Monad.State (liftIO)
import System.Environment
import Data.Char
import Messages
import RemoteClass import RemoteClass
import Types import Types
@ -25,26 +34,13 @@ generate = do
todo <- filterM cachedUUID remotes todo <- filterM cachedUUID remotes
let ok = filter (`notElem` todo) remotes let ok = filter (`notElem` todo) remotes
let actions = map genRemote ok ++ let actions = map (\r -> genRemote r =<< getUUID r) ok ++
map (\r -> genRemote =<< tryS3ConfigRead r) todo map (\r -> genRemote r =<< getS3UUID r) todo
return (actions, map Git.repoDescribe todo) return (actions, map Git.repoDescribe todo)
where where
cachedUUID r = liftM null $ getUUID r cachedUUID r = liftM null $ getUUID r
genRemote :: Git.Repo -> Annex (Remote Annex)
genRemote r = do
return Remote {
uuid = error "TODO",
cost = error "TODO",
name = Git.repoDescribe r,
storeKey = error "TODO",
retrieveKeyFile = error "TODO",
removeKey = error "TODO",
hasKey = error "TODO",
hasKeyCheap = False
}
{- S3 remotes have a remote.<name>.annex-s3-bucket config setting. {- S3 remotes have a remote.<name>.annex-s3-bucket config setting.
- Git.Repo does not normally generate remotes for things that - Git.Repo does not normally generate remotes for things that
- have no configured url, so the Git.Repo objects have to be - have no configured url, so the Git.Repo objects have to be
@ -57,5 +53,73 @@ findS3Remotes r = map construct remotepairs
construct (k,_) = Git.repoRemoteNameSet Git.repoFromUnknown k construct (k,_) = Git.repoRemoteNameSet Git.repoFromUnknown k
s3remote k = startswith "remote." k && endswith ".annex-s3-bucket" k s3remote k = startswith "remote." k && endswith ".annex-s3-bucket" k
tryS3ConfigRead :: Git.Repo -> Annex Git.Repo genRemote :: Git.Repo -> UUID -> Annex (Remote Annex)
tryS3ConfigRead r = error "TODO" genRemote r u = do
c <- remoteCost r
return Remote {
uuid = u,
cost = c,
name = Git.repoDescribe r,
storeKey = error "TODO",
retrieveKeyFile = error "TODO",
removeKey = error "TODO",
hasKey = error "TODO",
hasKeyCheap = False
}
s3Connection :: Git.Repo -> Annex (Maybe AWSConnection)
s3Connection r = do
host <- getS3Config r "s3-host" (Just defaultAmazonS3Host)
port <- getS3Config r "s3-port" (Just $ show defaultAmazonS3Port)
accesskey <- getS3Config r "s3-access-key-id" Nothing
secretkey <- getS3Config r "s3-secret-access-key" Nothing
case reads port of
[(p, _)] -> return $ Just $ AWSConnection host p accesskey secretkey
_ -> error $ "bad S3 port value: " ++ port
withS3Connection :: Git.Repo -> Annex a -> ((AWSConnection, String) -> Annex a) -> Annex a
withS3Connection r def a = do
c <- s3Connection r
case c of
Nothing -> def
Just c' -> do
b <- getConfig r "s3-bucket" ""
a (c', b)
getS3Config :: Git.Repo -> String -> Maybe String-> Annex String
getS3Config r s def = do
e <- liftIO $ catch (liftM Just $ getEnv envvar) (const $ return def)
v <- case e of
Nothing -> getConfig r s ""
Just d -> getConfig r s d
when (null v) $ error $ "set " ++ envvar ++ " or " ++ remoteConfig r s
return v
where
envvar = "ANNEX_" ++ map (\c -> if c == '-' then '_' else toUpper c) s
{- The UUID of a S3 bucket is stored in a file "git-annex-uuid" in the
- bucket. Gets the UUID, or if there is none, sets a new UUID, possibly
- also creating the bucket. -}
getS3UUID :: Git.Repo -> Annex UUID
getS3UUID r = withS3Connection r disable $ \(c, b) -> do
res <- liftIO $
getObject c $ S3Object b uuidfile "" [] L.empty
case res of
Right o -> return $ L.unpack $ obj_data o
Left _ -> do
location <- getS3Config r "s3-datacenter" (Just "EU")
-- bucket may already exist, or not
_ <- liftIO $ createBucketIn c b location
u <- getUUID r
res' <- liftIO $ sendObject c $
S3Object b uuidfile "" [] $
L.pack u
case res' of
Right _ -> return u
Left e -> do
warning $ prettyReqError e
disable
where
uuidfile = "git-annex-uuid"
disable = return "" -- empty uuid will disable this remote

3
debian/changelog vendored
View file

@ -1,11 +1,12 @@
git-annex (0.20110326) UNRELEASED; urgency=low git-annex (0.20110326) UNRELEASED; urgency=low
* annex.diskreserve can be given in arbitrary units (ie "0.5 gigabytes") * Amazon is S3 now supported as a special type of remote.
* Generalized remotes handling, laying groundwork for remotes that are * Generalized remotes handling, laying groundwork for remotes that are
not regular git remotes. not regular git remotes.
* Provide a less expensive version of `git annex copy --to`, enabled * Provide a less expensive version of `git annex copy --to`, enabled
via --fast. This assumes that location tracking information is correct, via --fast. This assumes that location tracking information is correct,
rather than contacting the remote for every file. rather than contacting the remote for every file.
* annex.diskreserve can be given in arbitrary units (ie "0.5 gigabytes")
-- Joey Hess <joeyh@debian.org> Sat, 26 Mar 2011 14:36:16 -0400 -- Joey Hess <joeyh@debian.org> Sat, 26 Mar 2011 14:36:16 -0400