fix S3 upload buffering problem
Provide file size to new version of hS3.
This commit is contained in:
parent
d8329731c6
commit
6fcd3e1ef7
4 changed files with 43 additions and 30 deletions
|
@ -10,7 +10,7 @@ module Remote.S3 (remote) where
|
||||||
import Control.Exception.Extensible (IOException)
|
import Control.Exception.Extensible (IOException)
|
||||||
import Network.AWS.AWSConnection
|
import Network.AWS.AWSConnection
|
||||||
import Network.AWS.S3Object
|
import Network.AWS.S3Object
|
||||||
import Network.AWS.S3Bucket
|
import Network.AWS.S3Bucket hiding (size)
|
||||||
import Network.AWS.AWSResult
|
import Network.AWS.AWSResult
|
||||||
import qualified Data.ByteString.Lazy.Char8 as L
|
import qualified Data.ByteString.Lazy.Char8 as L
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
@ -18,6 +18,8 @@ import Data.Maybe
|
||||||
import Control.Monad (when)
|
import Control.Monad (when)
|
||||||
import Control.Monad.State (liftIO)
|
import Control.Monad.State (liftIO)
|
||||||
import System.Environment
|
import System.Environment
|
||||||
|
import System.Posix.Files
|
||||||
|
import System.Directory
|
||||||
|
|
||||||
import RemoteClass
|
import RemoteClass
|
||||||
import Types
|
import Types
|
||||||
|
@ -30,6 +32,7 @@ import Config
|
||||||
import Remote.Special
|
import Remote.Special
|
||||||
import Remote.Encryptable
|
import Remote.Encryptable
|
||||||
import Crypto
|
import Crypto
|
||||||
|
import Key
|
||||||
|
|
||||||
remote :: RemoteType Annex
|
remote :: RemoteType Annex
|
||||||
remote = RemoteType {
|
remote = RemoteType {
|
||||||
|
@ -100,21 +103,35 @@ s3Setup u c = do
|
||||||
store :: Remote Annex -> Key -> Annex Bool
|
store :: Remote Annex -> Key -> Annex Bool
|
||||||
store r k = s3Action r False $ \(conn, bucket) -> do
|
store r k = s3Action r False $ \(conn, bucket) -> do
|
||||||
g <- Annex.gitRepo
|
g <- Annex.gitRepo
|
||||||
content <- liftIO $ L.readFile $ gitAnnexLocation g k
|
res <- liftIO $ storeHelper (conn, bucket) r k $ gitAnnexLocation g k
|
||||||
res <- liftIO $ storeHelper (conn, bucket) r k content
|
|
||||||
s3Bool res
|
s3Bool res
|
||||||
|
|
||||||
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
|
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
|
||||||
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do
|
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do
|
||||||
g <- Annex.gitRepo
|
g <- Annex.gitRepo
|
||||||
let f = gitAnnexLocation g k
|
let f = gitAnnexLocation g k
|
||||||
res <- liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> do
|
-- To get file size of the encrypted content, have to use a temp file.
|
||||||
storeHelper (conn, bucket) r enck s
|
-- (An alternative would be chunking to to a constant size.)
|
||||||
|
let tmp = gitAnnexTmpLocation g enck
|
||||||
|
liftIO $ withEncryptedContent cipher (L.readFile f) $ \s -> L.writeFile tmp s
|
||||||
|
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
|
||||||
|
tmp_exists <- liftIO $ doesFileExist tmp
|
||||||
|
when tmp_exists $ liftIO $ removeFile tmp
|
||||||
s3Bool res
|
s3Bool res
|
||||||
|
|
||||||
storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> L.ByteString -> IO (AWSResult ())
|
storeHelper :: (AWSConnection, String) -> Remote Annex -> Key -> FilePath -> IO (AWSResult ())
|
||||||
storeHelper (conn, bucket) r k content = do
|
storeHelper (conn, bucket) r k file = do
|
||||||
let object = setStorageClass storageclass $ bucketKey bucket k content
|
content <- liftIO $ L.readFile file
|
||||||
|
-- size is provided to S3 so the whole content does not need to be
|
||||||
|
-- buffered to calculate it
|
||||||
|
size <- case keySize k of
|
||||||
|
Just s -> return $ fromIntegral s
|
||||||
|
Nothing -> do
|
||||||
|
s <- liftIO $ getFileStatus file
|
||||||
|
return $ fileSize s
|
||||||
|
let object = setStorageClass storageclass $
|
||||||
|
S3Object bucket (show k) ""
|
||||||
|
[("Content-Length",(show size))] content
|
||||||
sendObject conn object
|
sendObject conn object
|
||||||
where
|
where
|
||||||
storageclass =
|
storageclass =
|
||||||
|
@ -124,7 +141,7 @@ storeHelper (conn, bucket) r k content = do
|
||||||
|
|
||||||
retrieve :: Remote Annex -> Key -> FilePath -> Annex Bool
|
retrieve :: Remote Annex -> Key -> FilePath -> Annex Bool
|
||||||
retrieve r k f = s3Action r False $ \(conn, bucket) -> do
|
retrieve r k f = s3Action r False $ \(conn, bucket) -> do
|
||||||
res <- liftIO $ getObject conn $ bucketKey bucket k L.empty
|
res <- liftIO $ getObject conn $ bucketKey bucket k
|
||||||
case res of
|
case res of
|
||||||
Right o -> do
|
Right o -> do
|
||||||
liftIO $ L.writeFile f $ obj_data o
|
liftIO $ L.writeFile f $ obj_data o
|
||||||
|
@ -133,7 +150,7 @@ retrieve r k f = s3Action r False $ \(conn, bucket) -> do
|
||||||
|
|
||||||
retrieveEncrypted :: Remote Annex -> (Cipher, Key) -> FilePath -> Annex Bool
|
retrieveEncrypted :: Remote Annex -> (Cipher, Key) -> FilePath -> Annex Bool
|
||||||
retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do
|
retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do
|
||||||
res <- liftIO $ getObject conn $ bucketKey bucket enck L.empty
|
res <- liftIO $ getObject conn $ bucketKey bucket enck
|
||||||
case res of
|
case res of
|
||||||
Right o -> liftIO $
|
Right o -> liftIO $
|
||||||
withDecryptedContent cipher (return $ obj_data o) $ \content -> do
|
withDecryptedContent cipher (return $ obj_data o) $ \content -> do
|
||||||
|
@ -143,13 +160,13 @@ retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do
|
||||||
|
|
||||||
remove :: Remote Annex -> Key -> Annex Bool
|
remove :: Remote Annex -> Key -> Annex Bool
|
||||||
remove r k = s3Action r False $ \(conn, bucket) -> do
|
remove r k = s3Action r False $ \(conn, bucket) -> do
|
||||||
res <- liftIO $ deleteObject conn $ bucketKey bucket k L.empty
|
res <- liftIO $ deleteObject conn $ bucketKey bucket k
|
||||||
s3Bool res
|
s3Bool res
|
||||||
|
|
||||||
checkPresent :: Remote Annex -> Key -> Annex (Either IOException Bool)
|
checkPresent :: Remote Annex -> Key -> Annex (Either IOException Bool)
|
||||||
checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do
|
checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do
|
||||||
showNote ("checking " ++ name r ++ "...")
|
showNote ("checking " ++ name r ++ "...")
|
||||||
res <- liftIO $ getObjectInfo conn $ bucketKey bucket k L.empty
|
res <- liftIO $ getObjectInfo conn $ bucketKey bucket k
|
||||||
case res of
|
case res of
|
||||||
Right _ -> return $ Right True
|
Right _ -> return $ Right True
|
||||||
Left (AWSError _ _) -> return $ Right False
|
Left (AWSError _ _) -> return $ Right False
|
||||||
|
@ -205,5 +222,5 @@ s3Action r noconn action = do
|
||||||
(Just b, Just c) -> action (c, b)
|
(Just b, Just c) -> action (c, b)
|
||||||
_ -> return noconn
|
_ -> return noconn
|
||||||
|
|
||||||
bucketKey :: String -> Key -> L.ByteString -> S3Object
|
bucketKey :: String -> Key -> S3Object
|
||||||
bucketKey bucket k content = S3Object bucket (show k) "" [] content
|
bucketKey bucket k = S3Object bucket (show k) "" [] L.empty
|
||||||
|
|
9
debian/changelog
vendored
9
debian/changelog
vendored
|
@ -1,9 +1,12 @@
|
||||||
git-annex (0.20110420) UNRELEASED; urgency=low
|
git-annex (0.20110420) UNRELEASED; urgency=low
|
||||||
|
|
||||||
* Update Debian build dependencies for ghc 7.
|
* Update Debian build dependencies for ghc 7.
|
||||||
* Debian package is now built with S3 support. Thanks Joachim Breitner for
|
* Debian package is now built with S3 support.
|
||||||
making this possible, also thanks Greg Heartsfield for working to improve
|
Thanks Joachim Breitner for making this possible.
|
||||||
the hS3 library for git-annex.
|
* No longer needs to buffer entire files when sending them to S3.
|
||||||
|
(However, getting files from S3 still requires buffering.)
|
||||||
|
Thanks Greg Heartsfield for ongoing work to improve the hS3 library
|
||||||
|
for git-annex.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Thu, 21 Apr 2011 02:00:00 -0400
|
-- Joey Hess <joeyh@debian.org> Thu, 21 Apr 2011 02:00:00 -0400
|
||||||
|
|
||||||
|
|
1
debian/control
vendored
1
debian/control
vendored
|
@ -43,4 +43,3 @@ Description: manage files with git, without checking their contents into git
|
||||||
versioned files, which is convenient for maintaining documents, Makefiles,
|
versioned files, which is convenient for maintaining documents, Makefiles,
|
||||||
etc that are associated with annexed files but that benefit from full
|
etc that are associated with annexed files but that benefit from full
|
||||||
revision control.
|
revision control.
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
S3 has two memory leaks.
|
S3 has memory leaks
|
||||||
|
|
||||||
## with encryption
|
## with encryption
|
||||||
|
|
||||||
|
@ -8,16 +8,10 @@ not yet for S3, in 5985acdfad8a6791f0b2fc54a1e116cee9c12479.
|
||||||
|
|
||||||
## always
|
## always
|
||||||
|
|
||||||
The other occurs independant of encryption use. Copying a 100 mb
|
|
||||||
file to S3 causes an immediate sharp memory spike to 119 mb.
|
|
||||||
Copying the file back from S3 causes a slow memory increase toward 119 mb.
|
Copying the file back from S3 causes a slow memory increase toward 119 mb.
|
||||||
It's likely that this memory is used by the hS3 library, if it does not
|
|
||||||
construct the message to Amazon lazily. (And it may not be possible to
|
|
||||||
construct it lazily, if it includes checksum headers..)
|
|
||||||
|
|
||||||
I have emailed the hS3 author about this. He wrote back quickly, seems
|
The author of hS3 is aware of the problem, and working on it.
|
||||||
only getting the size of the file is causing it to be buffered, and a quick
|
|
||||||
fix should be forthcoming. Update: 0.5.6 has been released which will
|
## fixed
|
||||||
allow providing file size out of band to avoid buffering when uploading.
|
|
||||||
Downloading will take further work in hS3.
|
memory leak while uploading content to S3
|
||||||
--[[Joey]]
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue