S3: support chunking
The assistant defaults to 1MiB chunk size for new S3 special remotes. Which will work around a couple of bugs: http://git-annex.branchable.com/bugs/S3_memory_leaks/ http://git-annex.branchable.com/bugs/S3_upload_not_using_multipart/
This commit is contained in:
parent
c3750901d8
commit
32e4368377
6 changed files with 38 additions and 60 deletions
|
@ -129,6 +129,7 @@ postAddS3R = awsConfigurator $ do
|
||||||
, ("type", "S3")
|
, ("type", "S3")
|
||||||
, ("datacenter", T.unpack $ datacenter input)
|
, ("datacenter", T.unpack $ datacenter input)
|
||||||
, ("storageclass", show $ storageClass input)
|
, ("storageclass", show $ storageClass input)
|
||||||
|
, ("chunk", "1MiB")
|
||||||
]
|
]
|
||||||
_ -> $(widgetFile "configurators/adds3")
|
_ -> $(widgetFile "configurators/adds3")
|
||||||
#else
|
#else
|
||||||
|
|
86
Remote/S3.hs
86
Remote/S3.hs
|
@ -25,12 +25,10 @@ import qualified Git
|
||||||
import Config
|
import Config
|
||||||
import Config.Cost
|
import Config.Cost
|
||||||
import Remote.Helper.Special
|
import Remote.Helper.Special
|
||||||
import Remote.Helper.Encryptable
|
import Remote.Helper.ChunkedEncryptable
|
||||||
import qualified Remote.Helper.AWS as AWS
|
import qualified Remote.Helper.AWS as AWS
|
||||||
import Crypto
|
|
||||||
import Creds
|
import Creds
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
import Annex.Content
|
|
||||||
import Annex.UUID
|
import Annex.UUID
|
||||||
import Logs.Web
|
import Logs.Web
|
||||||
|
|
||||||
|
@ -47,17 +45,17 @@ remote = RemoteType {
|
||||||
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
|
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
|
||||||
gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
|
gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
|
||||||
where
|
where
|
||||||
new cst = Just $ encryptableRemote c
|
new cst = Just $ chunkedEncryptableRemote c
|
||||||
(storeEncrypted this)
|
(prepareStore this)
|
||||||
(retrieveEncrypted this)
|
(prepareRetrieve this)
|
||||||
this
|
this
|
||||||
where
|
where
|
||||||
this = Remote {
|
this = Remote {
|
||||||
uuid = u,
|
uuid = u,
|
||||||
cost = cst,
|
cost = cst,
|
||||||
name = Git.repoDescribe r,
|
name = Git.repoDescribe r,
|
||||||
storeKey = store this,
|
storeKey = storeKeyDummy,
|
||||||
retrieveKeyFile = retrieve this,
|
retrieveKeyFile = retreiveKeyFileDummy,
|
||||||
retrieveKeyFileCheap = retrieveCheap this,
|
retrieveKeyFileCheap = retrieveCheap this,
|
||||||
removeKey = remove this c,
|
removeKey = remove this c,
|
||||||
hasKey = checkPresent this,
|
hasKey = checkPresent this,
|
||||||
|
@ -123,67 +121,39 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost
|
||||||
writeUUIDFile archiveconfig u
|
writeUUIDFile archiveconfig u
|
||||||
use archiveconfig
|
use archiveconfig
|
||||||
|
|
||||||
store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
|
prepareStore :: Remote -> Preparer Storer
|
||||||
store r k _f p = s3Action r False $ \(conn, bucket) ->
|
prepareStore r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
|
||||||
sendAnnex k (void $ remove' r k) $ \src -> do
|
fileStorer $ \k src p -> do
|
||||||
ok <- s3Bool =<< storeHelper (conn, bucket) r k p src
|
ok <- s3Bool =<< liftIO (store (conn, bucket) r k p src)
|
||||||
|
|
||||||
-- Store public URL to item in Internet Archive.
|
-- Store public URL to item in Internet Archive.
|
||||||
when (ok && isIA (config r)) $
|
when (ok && isIA (config r) && not (isChunkKey k)) $
|
||||||
setUrlPresent k (iaKeyUrl r k)
|
setUrlPresent k (iaKeyUrl r k)
|
||||||
|
|
||||||
return ok
|
return ok
|
||||||
|
|
||||||
storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
|
store :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> IO (AWSResult ())
|
||||||
storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) ->
|
store (conn, bucket) r k p file = do
|
||||||
-- To get file size of the encrypted content, have to use a temp file.
|
size <- (fromIntegral . fileSize <$> getFileStatus file) :: IO Integer
|
||||||
-- (An alternative would be chunking to to a constant size.)
|
withMeteredFile file p $ \content -> do
|
||||||
withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do
|
-- size is provided to S3 so the whole content
|
||||||
liftIO $ encrypt (getGpgEncParams r) cipher (feedFile src) $
|
-- does not need to be buffered to calculate it
|
||||||
readBytes $ L.writeFile tmp
|
let object = S3Object
|
||||||
s3Bool =<< storeHelper (conn, bucket) r enck p tmp
|
bucket (bucketFile r k) ""
|
||||||
|
(("Content-Length", show size) : getXheaders (config r))
|
||||||
|
content
|
||||||
|
sendObject conn $
|
||||||
|
setStorageClass (getStorageClass $ config r) object
|
||||||
|
|
||||||
storeHelper :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ())
|
prepareRetrieve :: Remote -> Preparer Retriever
|
||||||
storeHelper (conn, bucket) r k p file = do
|
prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
|
||||||
size <- maybe getsize (return . fromIntegral) $ keySize k
|
byteRetriever $ \k ->
|
||||||
meteredBytes (Just p) size $ \meterupdate ->
|
liftIO (getObject conn $ bucketKey r bucket k)
|
||||||
liftIO $ withMeteredFile file meterupdate $ \content -> do
|
>>= either s3Error (return . obj_data)
|
||||||
-- size is provided to S3 so the whole content
|
|
||||||
-- does not need to be buffered to calculate it
|
|
||||||
let object = S3Object
|
|
||||||
bucket (bucketFile r k) ""
|
|
||||||
(("Content-Length", show size) : getXheaders (config r))
|
|
||||||
content
|
|
||||||
sendObject conn $
|
|
||||||
setStorageClass (getStorageClass $ config r) object
|
|
||||||
where
|
|
||||||
getsize = liftIO $ fromIntegral . fileSize <$> getFileStatus file
|
|
||||||
|
|
||||||
retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
|
|
||||||
retrieve r k _f d p = s3Action r False $ \(conn, bucket) ->
|
|
||||||
metered (Just p) k $ \meterupdate -> do
|
|
||||||
res <- liftIO $ getObject conn $ bucketKey r bucket k
|
|
||||||
case res of
|
|
||||||
Right o -> do
|
|
||||||
liftIO $ meteredWriteFile meterupdate d $
|
|
||||||
obj_data o
|
|
||||||
return True
|
|
||||||
Left e -> s3Warning e
|
|
||||||
|
|
||||||
retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool
|
retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool
|
||||||
retrieveCheap _ _ _ = return False
|
retrieveCheap _ _ _ = return False
|
||||||
|
|
||||||
retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
|
|
||||||
retrieveEncrypted r (cipher, enck) k d p = s3Action r False $ \(conn, bucket) ->
|
|
||||||
metered (Just p) k $ \meterupdate -> do
|
|
||||||
res <- liftIO $ getObject conn $ bucketKey r bucket enck
|
|
||||||
case res of
|
|
||||||
Right o -> liftIO $ decrypt cipher (\h -> meteredWrite meterupdate h $ obj_data o) $
|
|
||||||
readBytes $ \content -> do
|
|
||||||
L.writeFile d content
|
|
||||||
return True
|
|
||||||
Left e -> s3Warning e
|
|
||||||
|
|
||||||
{- Internet Archive doesn't easily allow removing content.
|
{- Internet Archive doesn't easily allow removing content.
|
||||||
- While it may remove the file, there are generally other files
|
- While it may remove the file, there are generally other files
|
||||||
- derived from it that it does not remove. -}
|
- derived from it that it does not remove. -}
|
||||||
|
|
|
@ -15,6 +15,7 @@ module Types.Key (
|
||||||
file2key,
|
file2key,
|
||||||
nonChunkKey,
|
nonChunkKey,
|
||||||
chunkKeyOffset,
|
chunkKeyOffset,
|
||||||
|
isChunkKey,
|
||||||
|
|
||||||
prop_idempotent_key_encode,
|
prop_idempotent_key_encode,
|
||||||
prop_idempotent_key_decode
|
prop_idempotent_key_decode
|
||||||
|
@ -62,6 +63,9 @@ chunkKeyOffset k = (*)
|
||||||
<$> keyChunkSize k
|
<$> keyChunkSize k
|
||||||
<*> (pred <$> keyChunkNum k)
|
<*> (pred <$> keyChunkNum k)
|
||||||
|
|
||||||
|
isChunkKey :: Key -> Bool
|
||||||
|
isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k)
|
||||||
|
|
||||||
fieldSep :: Char
|
fieldSep :: Char
|
||||||
fieldSep = '-'
|
fieldSep = '-'
|
||||||
|
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -1,7 +1,7 @@
|
||||||
git-annex (5.20140718) UNRELEASED; urgency=medium
|
git-annex (5.20140718) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
* New chunk= option to chunk files stored in special remotes.
|
* New chunk= option to chunk files stored in special remotes.
|
||||||
Currently supported by: directory, and all external special remotes.
|
Currently supported by: directory, S3, and all external special remotes.
|
||||||
* Partially transferred files are automatically resumed when using
|
* Partially transferred files are automatically resumed when using
|
||||||
chunked remotes!
|
chunked remotes!
|
||||||
* The old chunksize= option is deprecated. Do not use for new remotes.
|
* The old chunksize= option is deprecated. Do not use for new remotes.
|
||||||
|
|
|
@ -18,6 +18,9 @@ the S3 remote.
|
||||||
* `encryption` - One of "none", "hybrid", "shared", or "pubkey".
|
* `encryption` - One of "none", "hybrid", "shared", or "pubkey".
|
||||||
See [[encryption]].
|
See [[encryption]].
|
||||||
|
|
||||||
|
* `chunk` - Enables [[chunking]] when storing large files.
|
||||||
|
`chunk=1MiB` is a good starting point for chunking.
|
||||||
|
|
||||||
* `keyid` - Specifies the gpg key to use for [[encryption]].
|
* `keyid` - Specifies the gpg key to use for [[encryption]].
|
||||||
|
|
||||||
* `embedcreds` - Optional. Set to "yes" embed the login credentials inside
|
* `embedcreds` - Optional. Set to "yes" embed the login credentials inside
|
||||||
|
|
|
@ -14,7 +14,7 @@ like "2512E3C7"
|
||||||
|
|
||||||
Next, create the S3 remote, and describe it.
|
Next, create the S3 remote, and describe it.
|
||||||
|
|
||||||
# git annex initremote cloud type=S3 keyid=2512E3C7
|
# git annex initremote cloud type=S3 chunk=1MiB keyid=2512E3C7
|
||||||
initremote cloud (encryption setup with gpg key C910D9222512E3C7) (checking bucket) (creating bucket in US) (gpg) ok
|
initremote cloud (encryption setup with gpg key C910D9222512E3C7) (checking bucket) (creating bucket in US) (gpg) ok
|
||||||
# git annex describe cloud "at Amazon's US datacenter"
|
# git annex describe cloud "at Amazon's US datacenter"
|
||||||
describe cloud ok
|
describe cloud ok
|
||||||
|
|
Loading…
Reference in a new issue