convert all git read/write functions to use ByteStrings

This yields a second or so speedup in unused, find, etc. Seems that even
when the ByteString is immediately split and then converted to Strings,
it's faster.

I may try to push ByteStrings out into more of git-annex gradually,
although I suspect most of the time-critical parts are already covered
now, and many of the rest rely on libraries that only support Strings.
This commit is contained in:
Joey Hess 2011-09-29 23:43:42 -04:00
parent 949ef94d5e
commit 7ff89ccfee
8 changed files with 49 additions and 94 deletions

View file

@ -1,62 +0,0 @@
{- module using Data.ByteString.Lazy.Char8 for git IO
-
- This can be imported instead of Git when more efficient ByteString IO
- is needed.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.ByteString (
module Git,
pipeRead,
pipeWrite,
pipeWriteRead,
pipeNullSplit
) where
import Control.Applicative
import System.Cmd.Utils
import System.IO
import qualified Data.ByteString.Lazy.Char8 as L
import Git hiding (pipeRead, pipeWrite, pipeWriteRead, pipeNullSplit)
import Utility.SafeCommand
{- Runs a git subcommand and returns its output, lazily.
-
- Note that this leaves the git process running, and so zombies will
- result unless reap is called.
-}
pipeRead :: Repo -> [CommandParam] -> IO L.ByteString
pipeRead repo params = assertLocal repo $ do
(_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params
hSetBinaryMode h True
L.hGetContents h
{- Runs a git subcommand, feeding it input.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle
pipeWrite repo params s = assertLocal repo $ do
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params)
L.hPut h s
hClose h
return p
{- Runs a git subcommand, feeding it input, and returning its output.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString)
pipeWriteRead repo params s = assertLocal repo $ do
(p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params)
hSetBinaryMode from True
L.hPut to s
hClose to
c <- L.hGetContents from
return (p, c)
{- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it. -}
pipeNullSplit :: Repo -> [CommandParam] -> IO [L.ByteString]
pipeNullSplit repo params = filter (not . L.null) . L.split '\0' <$>
pipeRead repo params

View file

@ -20,13 +20,11 @@ import Utility.SafeCommand
{- Scans for files that are checked into git at the specified locations. -}
inRepo :: Repo -> [FilePath] -> IO [FilePath]
inRepo repo l = pipeNullSplit repo $
Params "ls-files --cached -z --" : map File l
inRepo repo l = pipeNullSplit repo $ Params "ls-files --cached -z --" : map File l
{- Scans for files at the specified locations that are not checked into git. -}
notInRepo :: Repo -> Bool -> [FilePath] -> IO [FilePath]
notInRepo repo include_ignored l =
pipeNullSplit repo $
notInRepo repo include_ignored l = pipeNullSplit repo $
[Params "ls-files --others"] ++ exclude ++
[Params "-z --"] ++ map File l
where

View file

@ -16,7 +16,7 @@ import Control.Applicative
import System.Posix.Types
import qualified Data.ByteString.Lazy.Char8 as L
import Git.ByteString
import Git
import Utility.SafeCommand
type Treeish = String
@ -31,7 +31,7 @@ data TreeItem = TreeItem
{- Lists the contents of a Treeish -}
lsTree :: Repo -> Treeish -> IO [TreeItem]
lsTree repo t = map parseLsTree <$>
pipeNullSplit repo [Params "ls-tree --full-tree -z -r --", File t]
pipeNullSplitB repo [Params "ls-tree --full-tree -z -r --", File t]
{- Parses a line of ls-tree output.
- (The --long format is not currently supported.) -}

View file

@ -19,7 +19,6 @@ import Data.String.Utils
import qualified Data.ByteString.Lazy.Char8 as L
import Git
import qualified Git.ByteString as GitB
import Utility.SafeCommand
{- Performs a union merge between two branches, staging it in the index.
@ -44,7 +43,7 @@ merge _ _ = error "wrong number of branches to merge"
update_index :: Repo -> [String] -> IO ()
update_index g l = togit ["update-index", "-z", "--index-info"] (join "\0" l)
where
togit ps content = pipeWrite g (map Param ps) content
togit ps content = pipeWrite g (map Param ps) (L.pack content)
>>= forceSuccess
{- Generates a line suitable to be fed into update-index, to add
@ -83,7 +82,7 @@ calc_merge g differ = do
{- Injects some content into git, returning its hash. -}
hashObject :: Repo -> L.ByteString -> IO String
hashObject repo content = getSha subcmd $ do
(h, s) <- GitB.pipeWriteRead repo (map Param params) content
(h, s) <- pipeWriteRead repo (map Param params) content
L.length s `seq` do
forceSuccess h
reap -- XXX unsure why this is needed
@ -100,7 +99,7 @@ mergeFile g (info, file) = case filter (/= nullsha) [asha, bsha] of
[] -> return Nothing
(sha:[]) -> return $ Just $ update_index_line sha file
shas -> do
content <- GitB.pipeRead g $ map Param ("show":shas)
content <- pipeRead g $ map Param ("show":shas)
sha <- hashObject g $ unionmerge content
return $ Just $ update_index_line sha file
where