S3: support chunking
The assistant defaults to 1MiB chunk size for new S3 special remotes. Which will work around a couple of bugs: http://git-annex.branchable.com/bugs/S3_memory_leaks/ http://git-annex.branchable.com/bugs/S3_upload_not_using_multipart/
This commit is contained in:
parent
c3750901d8
commit
32e4368377
6 changed files with 38 additions and 60 deletions
|
@ -129,6 +129,7 @@ postAddS3R = awsConfigurator $ do
|
|||
, ("type", "S3")
|
||||
, ("datacenter", T.unpack $ datacenter input)
|
||||
, ("storageclass", show $ storageClass input)
|
||||
, ("chunk", "1MiB")
|
||||
]
|
||||
_ -> $(widgetFile "configurators/adds3")
|
||||
#else
|
||||
|
|
70
Remote/S3.hs
70
Remote/S3.hs
|
@ -25,12 +25,10 @@ import qualified Git
|
|||
import Config
|
||||
import Config.Cost
|
||||
import Remote.Helper.Special
|
||||
import Remote.Helper.Encryptable
|
||||
import Remote.Helper.ChunkedEncryptable
|
||||
import qualified Remote.Helper.AWS as AWS
|
||||
import Crypto
|
||||
import Creds
|
||||
import Utility.Metered
|
||||
import Annex.Content
|
||||
import Annex.UUID
|
||||
import Logs.Web
|
||||
|
||||
|
@ -47,17 +45,17 @@ remote = RemoteType {
|
|||
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
|
||||
gen r u c gc = new <$> remoteCost gc expensiveRemoteCost
|
||||
where
|
||||
new cst = Just $ encryptableRemote c
|
||||
(storeEncrypted this)
|
||||
(retrieveEncrypted this)
|
||||
new cst = Just $ chunkedEncryptableRemote c
|
||||
(prepareStore this)
|
||||
(prepareRetrieve this)
|
||||
this
|
||||
where
|
||||
this = Remote {
|
||||
uuid = u,
|
||||
cost = cst,
|
||||
name = Git.repoDescribe r,
|
||||
storeKey = store this,
|
||||
retrieveKeyFile = retrieve this,
|
||||
storeKey = storeKeyDummy,
|
||||
retrieveKeyFile = retreiveKeyFileDummy,
|
||||
retrieveKeyFileCheap = retrieveCheap this,
|
||||
removeKey = remove this c,
|
||||
hasKey = checkPresent this,
|
||||
|
@ -123,31 +121,21 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost
|
|||
writeUUIDFile archiveconfig u
|
||||
use archiveconfig
|
||||
|
||||
store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
|
||||
store r k _f p = s3Action r False $ \(conn, bucket) ->
|
||||
sendAnnex k (void $ remove' r k) $ \src -> do
|
||||
ok <- s3Bool =<< storeHelper (conn, bucket) r k p src
|
||||
prepareStore :: Remote -> Preparer Storer
|
||||
prepareStore r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
|
||||
fileStorer $ \k src p -> do
|
||||
ok <- s3Bool =<< liftIO (store (conn, bucket) r k p src)
|
||||
|
||||
-- Store public URL to item in Internet Archive.
|
||||
when (ok && isIA (config r)) $
|
||||
when (ok && isIA (config r) && not (isChunkKey k)) $
|
||||
setUrlPresent k (iaKeyUrl r k)
|
||||
|
||||
return ok
|
||||
|
||||
storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
|
||||
storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) ->
|
||||
-- To get file size of the encrypted content, have to use a temp file.
|
||||
-- (An alternative would be chunking to to a constant size.)
|
||||
withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do
|
||||
liftIO $ encrypt (getGpgEncParams r) cipher (feedFile src) $
|
||||
readBytes $ L.writeFile tmp
|
||||
s3Bool =<< storeHelper (conn, bucket) r enck p tmp
|
||||
|
||||
storeHelper :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ())
|
||||
storeHelper (conn, bucket) r k p file = do
|
||||
size <- maybe getsize (return . fromIntegral) $ keySize k
|
||||
meteredBytes (Just p) size $ \meterupdate ->
|
||||
liftIO $ withMeteredFile file meterupdate $ \content -> do
|
||||
store :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> IO (AWSResult ())
|
||||
store (conn, bucket) r k p file = do
|
||||
size <- (fromIntegral . fileSize <$> getFileStatus file) :: IO Integer
|
||||
withMeteredFile file p $ \content -> do
|
||||
-- size is provided to S3 so the whole content
|
||||
-- does not need to be buffered to calculate it
|
||||
let object = S3Object
|
||||
|
@ -156,34 +144,16 @@ storeHelper (conn, bucket) r k p file = do
|
|||
content
|
||||
sendObject conn $
|
||||
setStorageClass (getStorageClass $ config r) object
|
||||
where
|
||||
getsize = liftIO $ fromIntegral . fileSize <$> getFileStatus file
|
||||
|
||||
retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
|
||||
retrieve r k _f d p = s3Action r False $ \(conn, bucket) ->
|
||||
metered (Just p) k $ \meterupdate -> do
|
||||
res <- liftIO $ getObject conn $ bucketKey r bucket k
|
||||
case res of
|
||||
Right o -> do
|
||||
liftIO $ meteredWriteFile meterupdate d $
|
||||
obj_data o
|
||||
return True
|
||||
Left e -> s3Warning e
|
||||
prepareRetrieve :: Remote -> Preparer Retriever
|
||||
prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) ->
|
||||
byteRetriever $ \k ->
|
||||
liftIO (getObject conn $ bucketKey r bucket k)
|
||||
>>= either s3Error (return . obj_data)
|
||||
|
||||
retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool
|
||||
retrieveCheap _ _ _ = return False
|
||||
|
||||
retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
|
||||
retrieveEncrypted r (cipher, enck) k d p = s3Action r False $ \(conn, bucket) ->
|
||||
metered (Just p) k $ \meterupdate -> do
|
||||
res <- liftIO $ getObject conn $ bucketKey r bucket enck
|
||||
case res of
|
||||
Right o -> liftIO $ decrypt cipher (\h -> meteredWrite meterupdate h $ obj_data o) $
|
||||
readBytes $ \content -> do
|
||||
L.writeFile d content
|
||||
return True
|
||||
Left e -> s3Warning e
|
||||
|
||||
{- Internet Archive doesn't easily allow removing content.
|
||||
- While it may remove the file, there are generally other files
|
||||
- derived from it that it does not remove. -}
|
||||
|
|
|
@ -15,6 +15,7 @@ module Types.Key (
|
|||
file2key,
|
||||
nonChunkKey,
|
||||
chunkKeyOffset,
|
||||
isChunkKey,
|
||||
|
||||
prop_idempotent_key_encode,
|
||||
prop_idempotent_key_decode
|
||||
|
@ -62,6 +63,9 @@ chunkKeyOffset k = (*)
|
|||
<$> keyChunkSize k
|
||||
<*> (pred <$> keyChunkNum k)
|
||||
|
||||
isChunkKey :: Key -> Bool
|
||||
isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k)
|
||||
|
||||
fieldSep :: Char
|
||||
fieldSep = '-'
|
||||
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -1,7 +1,7 @@
|
|||
git-annex (5.20140718) UNRELEASED; urgency=medium
|
||||
|
||||
* New chunk= option to chunk files stored in special remotes.
|
||||
Currently supported by: directory, and all external special remotes.
|
||||
Currently supported by: directory, S3, and all external special remotes.
|
||||
* Partially transferred files are automatically resumed when using
|
||||
chunked remotes!
|
||||
* The old chunksize= option is deprecated. Do not use for new remotes.
|
||||
|
|
|
@ -18,6 +18,9 @@ the S3 remote.
|
|||
* `encryption` - One of "none", "hybrid", "shared", or "pubkey".
|
||||
See [[encryption]].
|
||||
|
||||
* `chunk` - Enables [[chunking]] when storing large files.
|
||||
`chunk=1MiB` is a good starting point for chunking.
|
||||
|
||||
* `keyid` - Specifies the gpg key to use for [[encryption]].
|
||||
|
||||
* `embedcreds` - Optional. Set to "yes" embed the login credentials inside
|
||||
|
|
|
@ -14,7 +14,7 @@ like "2512E3C7"
|
|||
|
||||
Next, create the S3 remote, and describe it.
|
||||
|
||||
# git annex initremote cloud type=S3 keyid=2512E3C7
|
||||
# git annex initremote cloud type=S3 chunk=1MiB keyid=2512E3C7
|
||||
initremote cloud (encryption setup with gpg key C910D9222512E3C7) (checking bucket) (creating bucket in US) (gpg) ok
|
||||
# git annex describe cloud "at Amazon's US datacenter"
|
||||
describe cloud ok
|
||||
|
|
Loading…
Reference in a new issue