bup: Avoid memory leak when transferring encrypted data.

This was a most surprising leak. It occurred in the process that is forked
off to feed data to gpg. That process was passed a lazy ByteString of
input, and ghc seemed to not GC the ByteString as it was lazily read
and consumed, so memory slowly leaked as the file was read and passed
through gpg to bup.

To fix it, I simply changed the feeder to take an IO action that returns
the lazy bytestring, and fed the result directly to hPut.

AFAICS, this should change nothing WRT buffering. But somehow it makes
ghc's GC do the right thing. Probably I triggered some weakness in ghc's
GC (version 6.12.1).

(Note that S3 still has this leak, and others too. Fixing it will involve
another dance with the type system.)

Update: One theory I have is that this has something to do with
the forking of the feeder process. Perhaps, when the ByteString
is produced before the fork, ghc decides it need to hold a pointer
to the start of it, for some reason -- maybe it doesn't realize that
it is only used in the forked process.
This commit is contained in:
Joey Hess 2011-04-19 15:26:50 -04:00
parent b1274b6378
commit 5985acdfad
5 changed files with 18 additions and 21 deletions

View file

@ -153,24 +153,24 @@ encryptKey c k =
{- Runs an action, passing it a handle from which it can {- Runs an action, passing it a handle from which it can
- stream encrypted content. -} - stream encrypted content. -}
withEncryptedHandle :: Cipher -> L.ByteString -> (Handle -> IO a) -> IO a withEncryptedHandle :: Cipher -> (IO L.ByteString) -> (Handle -> IO a) -> IO a
withEncryptedHandle = gpgCipherHandle [Params "--symmetric --force-mdc"] withEncryptedHandle = gpgCipherHandle [Params "--symmetric --force-mdc"]
{- Runs an action, passing it a handle from which it can {- Runs an action, passing it a handle from which it can
- stream decrypted content. -} - stream decrypted content. -}
withDecryptedHandle :: Cipher -> L.ByteString -> (Handle -> IO a) -> IO a withDecryptedHandle :: Cipher -> (IO L.ByteString) -> (Handle -> IO a) -> IO a
withDecryptedHandle = gpgCipherHandle [Param "--decrypt"] withDecryptedHandle = gpgCipherHandle [Param "--decrypt"]
{- Streams encrypted content to an action. -} {- Streams encrypted content to an action. -}
withEncryptedContent :: Cipher -> L.ByteString -> (L.ByteString -> IO a) -> IO a withEncryptedContent :: Cipher -> (IO L.ByteString) -> (L.ByteString -> IO a) -> IO a
withEncryptedContent = pass withEncryptedHandle withEncryptedContent = pass withEncryptedHandle
{- Streams decrypted content to an action. -} {- Streams decrypted content to an action. -}
withDecryptedContent :: Cipher -> L.ByteString -> (L.ByteString -> IO a) -> IO a withDecryptedContent :: Cipher -> (IO L.ByteString) -> (L.ByteString -> IO a) -> IO a
withDecryptedContent = pass withDecryptedHandle withDecryptedContent = pass withDecryptedHandle
pass :: (Cipher -> L.ByteString -> (Handle -> IO a) -> IO a) pass :: (Cipher -> (IO L.ByteString) -> (Handle -> IO a) -> IO a)
-> Cipher -> L.ByteString -> (L.ByteString -> IO a) -> IO a -> Cipher -> (IO L.ByteString) -> (L.ByteString -> IO a) -> IO a
pass to c i a = to c i $ \h -> a =<< L.hGetContents h pass to c i a = to c i $ \h -> a =<< L.hGetContents h
gpgParams :: [CommandParam] -> IO [String] gpgParams :: [CommandParam] -> IO [String]
@ -203,8 +203,8 @@ gpgPipeStrict params input = do
- -
- Note that to avoid deadlock with the cleanup stage, - Note that to avoid deadlock with the cleanup stage,
- the action must fully consume gpg's input before returning. -} - the action must fully consume gpg's input before returning. -}
gpgCipherHandle :: [CommandParam] -> Cipher -> L.ByteString -> (Handle -> IO a) -> IO a gpgCipherHandle :: [CommandParam] -> Cipher -> (IO L.ByteString) -> (Handle -> IO a) -> IO a
gpgCipherHandle params c input a = do gpgCipherHandle params c a b = do
-- pipe the passphrase into gpg on a fd -- pipe the passphrase into gpg on a fd
(frompipe, topipe) <- createPipe (frompipe, topipe) <- createPipe
_ <- forkIO $ do _ <- forkIO $ do
@ -217,11 +217,11 @@ gpgCipherHandle params c input a = do
params' <- gpgParams $ passphrase ++ params params' <- gpgParams $ passphrase ++ params
(pid, fromh, toh) <- hPipeBoth "gpg" params' (pid, fromh, toh) <- hPipeBoth "gpg" params'
_ <- forkProcess $ do _ <- forkProcess $ do
L.hPut toh input L.hPut toh =<< a
hClose toh hClose toh
exitSuccess exitSuccess
hClose toh hClose toh
ret <- a fromh ret <- b fromh
-- cleanup -- cleanup
forceSuccess pid forceSuccess pid

View file

@ -126,8 +126,7 @@ storeEncrypted r buprepo (cipher, enck) k = do
let src = gitAnnexLocation g k let src = gitAnnexLocation g k
params <- bupSplitParams r buprepo enck (Param "-") params <- bupSplitParams r buprepo enck (Param "-")
liftIO $ catchBool $ do liftIO $ catchBool $ do
content <- L.readFile src withEncryptedHandle cipher (L.readFile src) $ \h -> do
withEncryptedHandle cipher content $ \h -> do
pipeBup params (Just h) Nothing pipeBup params (Just h) Nothing
retrieve :: BupRepo -> Key -> FilePath -> Annex Bool retrieve :: BupRepo -> Key -> FilePath -> Annex Bool
@ -142,8 +141,7 @@ retrieveEncrypted buprepo (cipher, enck) f = do
let params = bupParams "join" buprepo [Param $ show enck] let params = bupParams "join" buprepo [Param $ show enck]
liftIO $ catchBool $ do liftIO $ catchBool $ do
(pid, h) <- hPipeFrom "bup" $ toCommand params (pid, h) <- hPipeFrom "bup" $ toCommand params
content <- L.hGetContents h withDecryptedContent cipher (L.hGetContents h) $ L.writeFile f
withDecryptedContent cipher content $ L.writeFile f
forceSuccess pid forceSuccess pid
return True return True

View file

@ -92,8 +92,7 @@ storeEncrypted d (cipher, enck) k = do
liftIO $ catchBool $ storeHelper dest $ encrypt src dest liftIO $ catchBool $ storeHelper dest $ encrypt src dest
where where
encrypt src dest = do encrypt src dest = do
content <- L.readFile src withEncryptedContent cipher (L.readFile src) $ L.writeFile dest
withEncryptedContent cipher content $ L.writeFile dest
return True return True
storeHelper :: FilePath -> IO Bool -> IO Bool storeHelper :: FilePath -> IO Bool -> IO Bool
@ -113,8 +112,7 @@ retrieve d k f = liftIO $ copyFile (dirKey d k) f
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted d (cipher, enck) f = retrieveEncrypted d (cipher, enck) f =
liftIO $ catchBool $ do liftIO $ catchBool $ do
content <- L.readFile (dirKey d enck) withDecryptedContent cipher (L.readFile (dirKey d enck)) $ L.writeFile f
withDecryptedContent cipher content $ L.writeFile f
return True return True
remove :: FilePath -> Key -> Annex Bool remove :: FilePath -> Key -> Annex Bool

View file

@ -106,7 +106,7 @@ store r k = s3Action r False $ \(conn, bucket) -> do
storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool storeEncrypted :: Remote Annex -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do storeEncrypted r (cipher, enck) k = s3Action r False $ \(conn, bucket) -> do
content <- lazyKeyContent k content <- lazyKeyContent k
res <- liftIO $ withEncryptedContent cipher content $ \s -> do res <- liftIO $ withEncryptedContent cipher (return content) $ \s -> do
storeHelper (conn, bucket) r enck s storeHelper (conn, bucket) r enck s
s3Bool res s3Bool res
@ -139,7 +139,7 @@ retrieveEncrypted r (cipher, enck) f = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey bucket enck L.empty res <- liftIO $ getObject conn $ bucketKey bucket enck L.empty
case res of case res of
Right o -> liftIO $ Right o -> liftIO $
withDecryptedContent cipher (obj_data o) $ \content -> do withDecryptedContent cipher (return $ obj_data o) $ \content -> do
L.writeFile f content L.writeFile f content
return True return True
Left e -> s3Warning e Left e -> s3Warning e

3
debian/changelog vendored
View file

@ -3,7 +3,8 @@ git-annex (0.20110418) UNRELEASED; urgency=low
* Don't run gpg in batch mode, so it can prompt for passphrase when * Don't run gpg in batch mode, so it can prompt for passphrase when
there is no agent. there is no agent.
* Add missing build dep on dataenc. * Add missing build dep on dataenc.
* Fix stalls in S3 when transferring encrypted data. * S3: Fix stalls when transferring encrypted data.
* bup: Avoid memory leak when transferring encrypted data.
-- Joey Hess <joeyh@debian.org> Sun, 17 Apr 2011 14:29:49 -0400 -- Joey Hess <joeyh@debian.org> Sun, 17 Apr 2011 14:29:49 -0400