git-annex/Remote/Directory.hs
Joey Hess 5985acdfad bup: Avoid memory leak when transferring encrypted data.
This was a most surprising leak. It occurred in the process that is forked
off to feed data to gpg. That process was passed a lazy ByteString of
input, and ghc seemed to not GC the ByteString as it was lazily read
and consumed, so memory slowly leaked as the file was read and passed
through gpg to bup.

To fix it, I simply changed the feeder to take an IO action that returns
the lazy bytestring, and fed the result directly to hPut.

AFAICS, this should change nothing WRT buffering. But somehow it makes
ghc's GC do the right thing. Probably I triggered some weakness in ghc's
GC (version 6.12.1).

(Note that S3 still has this leak, and others too. Fixing it will involve
another dance with the type system.)

Update: One theory I have is that this has something to do with
the forking of the feeder process. Perhaps, when the ByteString
is produced before the fork, ghc decides it need to hold a pointer
to the start of it, for some reason -- maybe it doesn't realize that
it is only used in the forked process.
2011-04-19 15:27:03 -04:00

129 lines
3.4 KiB
Haskell
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{- A "remote" that is just a filesystem directory.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Remote.Directory (remote) where
import qualified Data.ByteString.Lazy.Char8 as L
import IO
import Control.Exception.Extensible (IOException)
import qualified Data.Map as M
import Control.Monad (when)
import Control.Monad.State (liftIO)
import System.Directory hiding (copyFile)
import System.FilePath
import RemoteClass
import Types
import qualified GitRepo as Git
import qualified Annex
import UUID
import Locations
import CopyFile
import Config
import Content
import Utility
import Remote.Special
import Remote.Encryptable
import Crypto
remote :: RemoteType Annex
remote = RemoteType {
typename = "directory",
enumerate = findSpecialRemotes "directory",
generate = gen,
setup = directorySetup
}
gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex)
gen r u c = do
dir <- getConfig r "directory" (error "missing directory")
cst <- remoteCost r cheapRemoteCost
return $ encryptableRemote c
(storeEncrypted dir)
(retrieveEncrypted dir)
Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
storeKey = store dir,
retrieveKeyFile = retrieve dir,
removeKey = remove dir,
hasKey = checkPresent dir,
hasKeyCheap = True,
config = Nothing
}
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
directorySetup u c = do
-- verify configuration is sane
let dir = case M.lookup "directory" c of
Nothing -> error "Specify directory="
Just d -> d
e <- liftIO $ doesDirectoryExist dir
when (not e) $ error $ "Directory does not exist: " ++ dir
c' <- encryptionSetup c
-- The directory is stored in git config, not in this remote's
-- persistant state, so it can vary between hosts.
gitConfigSpecialRemote u c' "directory" dir
return $ M.delete "directory" c'
dirKey :: FilePath -> Key -> FilePath
dirKey d k = d </> hashDirMixed k </> f </> f
where
f = keyFile k
store :: FilePath -> Key -> Annex Bool
store d k = do
g <- Annex.gitRepo
let src = gitAnnexLocation g k
let dest = dirKey d k
liftIO $ catchBool $ storeHelper dest $ copyFile src dest
storeEncrypted :: FilePath -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted d (cipher, enck) k = do
g <- Annex.gitRepo
let src = gitAnnexLocation g k
let dest = dirKey d enck
liftIO $ catchBool $ storeHelper dest $ encrypt src dest
where
encrypt src dest = do
withEncryptedContent cipher (L.readFile src) $ L.writeFile dest
return True
storeHelper :: FilePath -> IO Bool -> IO Bool
storeHelper dest a = do
let dir = parentDir dest
createDirectoryIfMissing True dir
allowWrite dir
ok <- a
when ok $ do
preventWrite dest
preventWrite dir
return ok
retrieve :: FilePath -> Key -> FilePath -> Annex Bool
retrieve d k f = liftIO $ copyFile (dirKey d k) f
retrieveEncrypted :: FilePath -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted d (cipher, enck) f =
liftIO $ catchBool $ do
withDecryptedContent cipher (L.readFile (dirKey d enck)) $ L.writeFile f
return True
remove :: FilePath -> Key -> Annex Bool
remove d k = liftIO $ catchBool $ do
allowWrite dir
removeFile file
removeDirectory dir
return True
where
file = dirKey d k
dir = parentDir file
checkPresent :: FilePath -> Key -> Annex (Either IOException Bool)
checkPresent d k = liftIO $ try $ doesFileExist (dirKey d k)