fix S3 upload buffering problem
Provide file size to new version of hS3.
This commit is contained in:
parent
d8329731c6
commit
6fcd3e1ef7
4 changed files with 43 additions and 30 deletions
|
@ -10,7 +10,7 @@ module Remote.S3 (remote) where
|
|||
import Control.Exception.Extensible (IOException)
|
||||
import Network.AWS.AWSConnection
|
||||
import Network.AWS.S3Object
|
||||
import Network.AWS.S3Bucket
|
||||
import Network.AWS.S3Bucket hiding (size)
|
||||
import Network.AWS.AWSResult
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import qualified Data.Map as M
|
||||
|
@ -18,6 +18,8 @@ import Data.Maybe
|
|||
import Control.Monad (when)
|
||||
import Control.Monad.State (liftIO)
|
||||
import System.Environment
|
||||
import System.Posix.Files
|
||||
import System.Directory
|
||||
|
||||
import RemoteClass
|
||||
import Types
|
||||
|
@ -30,6 +32,7 @@ import Config
|
|||
import Remote.Special
|
||||
import Remote.Encryptable
|
||||
import Crypto
|
||||
import Key
|
||||
|
||||
remote :: RemoteType Annex
|
||||
remote = RemoteType {
|
||||
|
@ -100,21 +103,35 @@ s3Setup u c = do
|
|||
store :: Remote Annex -> Key -> Annex Bool
|
||||
store r k = s3Action r False $ \(conn, bucket) -> do
|
||||
g <- Annex.gitRepo
|
||||
content <- liftIO $ L.readFile $ gitAnnexLocation g k
|
||||
res <- liftIO $ storeHelper (conn, bucket) r k content
|
||||
res <- liftIO $ storeHelper (conn, bucket) r k $ gitAnnexLocation g k
|
||||
s3Bool res
|
||||
|
||||
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
|
||||
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do
|
||||
g <- Annex.gitRepo
|
||||
let f = gitAnnexLocation g k
|
||||
res <- liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> do
|
||||
storeHelper (conn, bucket) r enck s
|
||||
-- To get file size of the encrypted content, have to use a temp file.
|
||||
-- (An alternative would be chunking to to a constant size.)
|
||||
let tmp = gitAnnexTmpLocation g enck
|
||||
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
|
||||
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
|
||||
tmp_exists <- liftIO $ doesFileExist tmp
|
||||
when tmp_exists $ liftIO $ removeFile tmp
|
||||
s3Bool res
|
||||
|
||||
storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> L.ByteString -> IO (AWSResult ())
|
||||
storeHelper (conn, bucket) r k content = do
|
||||
let object = setStorageClass storageclass $ bucketKey bucket k content
|
||||
storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ())
|
||||
storeHelper (conn, bucket) r k file = do
|
||||
content <- liftIO $ L.readFile file
|
||||
-- size is provided to S3 so the whole content does not need to be
|
||||
-- buffered to calculate it
|
||||
size <- case keySize k of
|
||||
Just s -> return $ fromIntegral s
|
||||
Nothing -> do
|
||||
s <- liftIO $ getFileStatus file
|
||||
return $ fileSize s
|
||||
let object = setStorageClass storageclass $
|
||||
S3Object bucket (show k) ""
|
||||
[("Content-Length",(show size))] content
|
||||
sendObject conn object
|
||||
where
|
||||
storageclass =
|
||||
|
@ -124,7 +141,7 @@ storeHelper (conn, bucket) r k content = do
|
|||
|
||||
retrieve :: Remote Annex -> Key -> FilePath -> Annex Bool
|
||||
retrieve r k f = s3Action r False $ \(conn, bucket) -> do
|
||||
res <- liftIO $ getObject conn $ bucketKey bucket k L.empty
|
||||
res <- liftIO $ getObject conn $ bucketKey bucket k
|
||||
case res of
|
||||
Right o -> do
|
||||
liftIO $ L.writeFile f $ obj_data o
|
||||
|
@ -133,7 +150,7 @@ retrieve r k f = s3Action r False $ \(conn, bucket) -> do
|
|||
|
||||
retrieveEncrypted :: Remote Annex -> (Cipher, Key) -> FilePath -> Annex Bool
|
||||
retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do
|
||||
res <- liftIO $ getObject conn $ bucketKey bucket enck L.empty
|
||||
res <- liftIO $ getObject conn $ bucketKey bucket enck
|
||||
case res of
|
||||
Right o -> liftIO $
|
||||
withDecryptedContent cipher (return $ obj_data o) $ \content -> do
|
||||
|
@ -143,13 +160,13 @@ retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do
|
|||
|
||||
remove :: Remote Annex -> Key -> Annex Bool
|
||||
remove r k = s3Action r False $ \(conn, bucket) -> do
|
||||
res <- liftIO $ deleteObject conn $ bucketKey bucket k L.empty
|
||||
res <- liftIO $ deleteObject conn $ bucketKey bucket k
|
||||
s3Bool res
|
||||
|
||||
checkPresent :: Remote Annex -> Key -> Annex (Either IOException Bool)
|
||||
checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do
|
||||
showNote ("checking " ++ name r ++ "...")
|
||||
res <- liftIO $ getObjectInfo conn $ bucketKey bucket k L.empty
|
||||
res <- liftIO $ getObjectInfo conn $ bucketKey bucket k
|
||||
case res of
|
||||
Right _ -> return $ Right True
|
||||
Left (AWSError _ _) -> return $ Right False
|
||||
|
@ -205,5 +222,5 @@ s3Action r noconn action = do
|
|||
(Just b, Just c) -> action (c, b)
|
||||
_ -> return noconn
|
||||
|
||||
bucketKey :: String -> Key -> L.ByteString -> S3Object
|
||||
bucketKey bucket k content = S3Object bucket (show k) "" [] content
|
||||
bucketKey :: String -> Key -> S3Object
|
||||
bucketKey bucket k = S3Object bucket (show k) "" [] L.empty
|
||||
|
|
9
debian/changelog
vendored
9
debian/changelog
vendored
|
@ -1,9 +1,12 @@
|
|||
git-annex (0.20110420) UNRELEASED; urgency=low
|
||||
|
||||
* Update Debian build dependencies for ghc 7.
|
||||
* Debian package is now built with S3 support. Thanks Joachim Breitner for
|
||||
making this possible, also thanks Greg Heartsfield for working to improve
|
||||
the hS3 library for git-annex.
|
||||
* Debian package is now built with S3 support.
|
||||
Thanks Joachim Breitner for making this possible.
|
||||
* No longer needs to buffer entire files when sending them to S3.
|
||||
(However, getting files from S3 still requires buffering.)
|
||||
Thanks Greg Heartsfield for ongoing work to improve the hS3 library
|
||||
for git-annex.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Thu, 21 Apr 2011 02:00:00 -0400
|
||||
|
||||
|
|
1
debian/control
vendored
1
debian/control
vendored
|
@ -43,4 +43,3 @@ Description: manage files with git, without checking their contents into git
|
|||
versioned files, which is convenient for maintaining documents, Makefiles,
|
||||
etc that are associated with annexed files but that benefit from full
|
||||
revision control.
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
S3 has two memory leaks.
|
||||
S3 has memory leaks
|
||||
|
||||
## with encryption
|
||||
|
||||
|
@ -8,16 +8,10 @@ not yet for S3, in 5985acdfad8a6791f0b2fc54a1e116cee9c12479.
|
|||
|
||||
## always
|
||||
|
||||
The other occurs independant of encryption use. Copying a 100 mb
|
||||
file to S3 causes an immediate sharp memory spike to 119 mb.
|
||||
Copying the file back from S3 causes a slow memory increase toward 119 mb.
|
||||
It's likely that this memory is used by the hS3 library, if it does not
|
||||
construct the message to Amazon lazily. (And it may not be possible to
|
||||
construct it lazily, if it includes checksum headers..)
|
||||
|
||||
I have emailed the hS3 author about this. He wrote back quickly, seems
|
||||
only getting the size of the file is causing it to be buffered, and a quick
|
||||
fix should be forthcoming. Update: 0.5.6 has been released which will
|
||||
allow providing file size out of band to avoid buffering when uploading.
|
||||
Downloading will take further work in hS3.
|
||||
--[[Joey]]
|
||||
The author of hS3 is aware of the problem, and working on it.
|
||||
|
||||
## fixed
|
||||
|
||||
memory leak while uploading content to S3
|
||||
|
|
Loading…
Reference in a new issue