git-annex/Remote/Bup.hs
Joey Hess 5985acdfad bup: Avoid memory leak when transferring encrypted data.
This was a most surprising leak. It occurred in the process that is forked
off to feed data to gpg. That process was passed a lazy ByteString of
input, and ghc seemed to not GC the ByteString as it was lazily read
and consumed, so memory slowly leaked as the file was read and passed
through gpg to bup.

To fix it, I simply changed the feeder to take an IO action that returns
the lazy bytestring, and fed the result directly to hPut.

AFAICS, this should change nothing WRT buffering. But somehow it makes
ghc's GC do the right thing. Probably I triggered some weakness in ghc's
GC (version 6.12.1).

(Note that S3 still has this leak, and others too. Fixing it will involve
another dance with the type system.)

Update: One theory I have is that this has something to do with
the forking of the feeder process. Perhaps, when the ByteString
is produced before the fork, ghc decides it need to hold a pointer
to the start of it, for some reason -- maybe it doesn't realize that
it is only used in the forked process.
2011-04-19 15:27:03 -04:00

237 lines
7.2 KiB
Haskell
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{- Using bup as a remote.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Remote.Bup (remote) where
import qualified Data.ByteString.Lazy.Char8 as L
import IO
import Control.Exception.Extensible (IOException)
import qualified Data.Map as M
import Control.Monad (unless, when)
import Control.Monad.State (liftIO)
import System.Process
import System.Exit
import System.FilePath
import Data.List.Utils
import System.Cmd.Utils
import RemoteClass
import Types
import qualified GitRepo as Git
import qualified Annex
import UUID
import Locations
import Config
import Utility
import Messages
import Ssh
import Remote.Special
import Remote.Encryptable
import Crypto
type BupRepo = String
remote :: RemoteType Annex
remote = RemoteType {
typename = "bup",
enumerate = findSpecialRemotes "buprepo",
generate = gen,
setup = bupSetup
}
gen :: Git.Repo -> UUID -> Maybe RemoteConfig -> Annex (Remote Annex)
gen r u c = do
buprepo <- getConfig r "buprepo" (error "missing buprepo")
cst <- remoteCost r (if bupLocal buprepo then semiCheapRemoteCost else expensiveRemoteCost)
bupr <- liftIO $ bup2GitRemote buprepo
(u', bupr') <- getBupUUID bupr u
return $ encryptableRemote c
(storeEncrypted r buprepo)
(retrieveEncrypted buprepo)
Remote {
uuid = u',
cost = cst,
name = Git.repoDescribe r,
storeKey = store r buprepo,
retrieveKeyFile = retrieve buprepo,
removeKey = remove,
hasKey = checkPresent r bupr',
hasKeyCheap = True,
config = c
}
bupSetup :: UUID -> RemoteConfig -> Annex RemoteConfig
bupSetup u c = do
-- verify configuration is sane
let buprepo = case M.lookup "buprepo" c of
Nothing -> error "Specify buprepo="
Just r -> r
c' <- encryptionSetup c
-- bup init will create the repository.
-- (If the repository already exists, bup init again appears safe.)
showNote "bup init"
ok <- bup "init" buprepo []
unless ok $ error "bup init failed"
storeBupUUID u buprepo
-- The buprepo is stored in git config, as well as this repo's
-- persistant state, so it can vary between hosts.
gitConfigSpecialRemote u c' "buprepo" buprepo
return c'
bupParams :: String -> BupRepo -> [CommandParam] -> [CommandParam]
bupParams command buprepo params =
(Param command) : [Param "-r", Param buprepo] ++ params
bup :: String -> BupRepo -> [CommandParam] -> Annex Bool
bup command buprepo params = do
showProgress -- make way for bup output
liftIO $ boolSystem "bup" $ bupParams command buprepo params
pipeBup :: [CommandParam] -> Maybe Handle -> Maybe Handle -> IO Bool
pipeBup params inh outh = do
p <- runProcess "bup" (toCommand params)
Nothing Nothing inh outh Nothing
ok <- waitForProcess p
case ok of
ExitSuccess -> return True
_ -> return False
bupSplitParams :: Git.Repo -> BupRepo -> Key -> CommandParam -> Annex [CommandParam]
bupSplitParams r buprepo k src = do
o <- getConfig r "bup-split-options" ""
let os = map Param $ words o
showProgress -- make way for bup output
return $ bupParams "split" buprepo
(os ++ [Param "-n", Param (show k), src])
store :: Git.Repo -> BupRepo -> Key -> Annex Bool
store r buprepo k = do
g <- Annex.gitRepo
let src = gitAnnexLocation g k
params <- bupSplitParams r buprepo k (File src)
liftIO $ boolSystem "bup" params
storeEncrypted :: Git.Repo -> BupRepo -> (Cipher, Key) -> Key -> Annex Bool
storeEncrypted r buprepo (cipher, enck) k = do
g <- Annex.gitRepo
let src = gitAnnexLocation g k
params <- bupSplitParams r buprepo enck (Param "-")
liftIO $ catchBool $ do
withEncryptedHandle cipher (L.readFile src) $ \h -> do
pipeBup params (Just h) Nothing
retrieve :: BupRepo -> Key -> FilePath -> Annex Bool
retrieve buprepo k f = do
let params = bupParams "join" buprepo [Param $ show k]
liftIO $ catchBool $ do
tofile <- openFile f WriteMode
pipeBup params Nothing (Just tofile)
retrieveEncrypted :: BupRepo -> (Cipher, Key) -> FilePath -> Annex Bool
retrieveEncrypted buprepo (cipher, enck) f = do
let params = bupParams "join" buprepo [Param $ show enck]
liftIO $ catchBool $ do
(pid, h) <- hPipeFrom "bup" $ toCommand params
withDecryptedContent cipher (L.hGetContents h) $ L.writeFile f
forceSuccess pid
return True
remove :: Key -> Annex Bool
remove _ = do
warning "content cannot be removed from bup remote"
return False
{- Bup does not provide a way to tell if a given dataset is present
- in a bup repository. One way it to check if the git repository has
- a branch matching the name (as created by bup split -n).
-}
checkPresent :: Git.Repo -> Git.Repo -> Key -> Annex (Either IOException Bool)
checkPresent r bupr k
| Git.repoIsUrl bupr = do
showNote ("checking " ++ Git.repoDescribe r ++ "...")
ok <- onBupRemote bupr boolSystem "git" params
return $ Right ok
| otherwise = liftIO $ try $ boolSystem "git" $ Git.gitCommandLine bupr params
where
params =
[ Params "show-ref --quiet --verify"
, Param $ "refs/heads/" ++ show k]
{- Store UUID in the annex.uuid setting of the bup repository. -}
storeBupUUID :: UUID -> BupRepo -> Annex ()
storeBupUUID u buprepo = do
r <- liftIO $ bup2GitRemote buprepo
if Git.repoIsUrl r
then do
showNote "storing uuid"
ok <- onBupRemote r boolSystem "git"
[Params $ "config annex.uuid " ++ u]
unless ok $ do error "ssh failed"
else liftIO $ do
r' <- Git.configRead r
let olduuid = Git.configGet r' "annex.uuid" ""
when (olduuid == "") $
Git.run r' "config" [Param "annex.uuid", Param u]
onBupRemote :: Git.Repo -> (FilePath -> [CommandParam] -> IO a) -> FilePath -> [CommandParam] -> Annex a
onBupRemote r a command params = do
let dir = shellEscape (Git.workTree r)
sshparams <- sshToRepo r [Param $
"cd " ++ dir ++ " && " ++ (unwords $ command : toCommand params)]
liftIO $ a "ssh" sshparams
{- Allow for bup repositories on removable media by checking
- local bup repositories to see if they are available, and getting their
- uuid (which may be different from the stored uuid for the bup remote).
-
- If a bup repository is not available, returns a dummy uuid of "".
- This will cause checkPresent to indicate nothing from the bup remote
- is known to be present.
-
- Also, returns a version of the repo with config read, if it is local.
-}
getBupUUID :: Git.Repo -> UUID -> Annex (UUID, Git.Repo)
getBupUUID r u
| Git.repoIsUrl r = return (u, r)
| otherwise = liftIO $ do
ret <- try $ Git.configRead r
case ret of
Right r' -> return (Git.configGet r' "annex.uuid" "", r')
Left _ -> return ("", r)
{- Converts a bup remote path spec into a Git.Repo. There are some
- differences in path representation between git and bup. -}
bup2GitRemote :: BupRepo -> IO Git.Repo
bup2GitRemote "" = do
-- bup -r "" operates on ~/.bup
h <- myHomeDir
Git.repoFromAbsPath $ h </> ".bup"
bup2GitRemote r
| bupLocal r =
if r !! 0 == '/'
then Git.repoFromAbsPath r
else error "please specify an absolute path"
| otherwise = Git.repoFromUrl $ "ssh://" ++ host ++ slash dir
where
bits = split ":" r
host = bits !! 0
dir = join ":" $ drop 1 bits
-- "host:~user/dir" is not supported specially by bup;
-- "host:dir" is relative to the home directory;
-- "host:" goes in ~/.bup
slash d
| d == "" = "/~/.bup"
| d !! 0 == '/' = d
| otherwise = "/~/" ++ d
bupLocal :: BupRepo -> Bool
bupLocal = notElem ':'