convert all git read/write functions to use ByteStrings

This yields a second or so speedup in unused, find, etc. Seems that even
when the ByteString is immediately split and then converted to Strings,
it's faster.

I may try to push ByteStrings out into more of git-annex gradually,
although I suspect most of the time-critical parts are already covered
now, and many of the rest rely on libraries that only support Strings.
This commit is contained in:
Joey Hess 2011-09-29 23:43:42 -04:00
parent 949ef94d5e
commit 7ff89ccfee
8 changed files with 49 additions and 94 deletions

View file

@ -30,6 +30,7 @@ import System.IO
import System.IO.Binary
import System.Posix.Process
import System.Exit
import qualified Data.ByteString.Lazy.Char8 as L
import Types.BranchState
import qualified Git
@ -181,7 +182,7 @@ siblingBranches :: Annex [String]
siblingBranches = do
g <- Annex.gitRepo
r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name]
return $ map (last . words) (lines r)
return $ map (last . words . L.unpack) (L.lines r)
{- Ensures that a given ref has been merged into the index. -}
updateRef :: GitRef -> Annex (Maybe String)
@ -196,7 +197,7 @@ updateRef ref
Param (name++".."++ref),
Params "--oneline -n1"
]
if null diffs
if L.null diffs
then return Nothing
else do
showSideAction $ "merging " ++ Git.refDescribe ref ++ " into " ++ name

View file

@ -16,6 +16,7 @@ import Data.Maybe
import System.FilePath
import System.Directory
import Data.List
import qualified Data.ByteString.Lazy.Char8 as L
import Command
import Types
@ -172,7 +173,7 @@ excludeReferenced l = do
refs = map last .
nubBy cmpheads .
filter ourbranches .
map words . lines
map words . lines . L.unpack
cmpheads a b = head a == head b
ourbranchend = '/' : Branch.name
ourbranches ws = not $ ourbranchend `isSuffixOf` last ws

54
Git.hs
View file

@ -44,6 +44,7 @@ module Git (
pipeWrite,
pipeWriteRead,
pipeNullSplit,
pipeNullSplitB,
attributes,
remotes,
remotesAdd,
@ -85,6 +86,7 @@ import Text.Printf
import Data.List (isInfixOf, isPrefixOf, isSuffixOf)
import System.Exit
import System.Posix.Env (setEnv, unsetEnv, getEnv)
import qualified Data.ByteString.Lazy.Char8 as L
import Utility
import Utility.Path
@ -379,22 +381,41 @@ run repo subcommand params = assertLocal repo $
- Note that this leaves the git process running, and so zombies will
- result unless reap is called.
-}
pipeRead :: Repo -> [CommandParam] -> IO String
pipeRead :: Repo -> [CommandParam] -> IO L.ByteString
pipeRead repo params = assertLocal repo $ do
(_, s) <- pipeFrom "git" $ toCommand $ gitCommandLine repo params
return s
(_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params
hSetBinaryMode h True
L.hGetContents h
{- Runs a git subcommand, feeding it input.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWrite :: Repo -> [CommandParam] -> String -> IO PipeHandle
pipeWrite repo params s = assertLocal repo $
pipeTo "git" (toCommand $ gitCommandLine repo params) s
pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle
pipeWrite repo params s = assertLocal repo $ do
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params)
L.hPut h s
hClose h
return p
{- Runs a git subcommand, feeding it input, and returning its output.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWriteRead :: Repo -> [CommandParam] -> String -> IO (PipeHandle, String)
pipeWriteRead repo params s = assertLocal repo $
pipeBoth "git" (toCommand $ gitCommandLine repo params) s
pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString)
pipeWriteRead repo params s = assertLocal repo $ do
(p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params)
hSetBinaryMode from True
L.hPut to s
hClose to
c <- L.hGetContents from
return (p, c)
{- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it. -}
pipeNullSplit :: Repo -> [CommandParam] -> IO [String]
pipeNullSplit repo params = map L.unpack <$> pipeNullSplitB repo params
{- For when Strings are not needed. -}
pipeNullSplitB :: Repo -> [CommandParam] -> IO [L.ByteString]
pipeNullSplitB repo params = filter (not . L.null) . L.split '\0' <$>
pipeRead repo params
{- Reaps any zombie git processes. -}
reap :: IO ()
@ -436,21 +457,18 @@ shaSize = 40
- with the specified parent refs. -}
commit :: Repo -> String -> String -> [String] -> IO ()
commit g message newref parentrefs = do
tree <- getSha "write-tree" $
tree <- getSha "write-tree" $ asString $
pipeRead g [Param "write-tree"]
sha <- getSha "commit-tree" $ ignorehandle $
pipeWriteRead g (map Param $ ["commit-tree", tree] ++ ps) message
sha <- getSha "commit-tree" $ asString $
ignorehandle $ pipeWriteRead g
(map Param $ ["commit-tree", tree] ++ ps)
(L.pack message)
run g "update-ref" [Param newref, Param sha]
where
ignorehandle a = snd <$> a
asString a = L.unpack <$> a
ps = concatMap (\r -> ["-p", r]) parentrefs
{- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it. -}
pipeNullSplit :: Repo -> [CommandParam] -> IO [String]
pipeNullSplit repo params = filter (not . null) . split "\0" <$>
pipeRead repo params
{- Runs git config and populates a repo with its config. -}
configRead :: Repo -> IO Repo
configRead repo@(Repo { location = Dir d }) = do

View file

@ -1,62 +0,0 @@
{- module using Data.ByteString.Lazy.Char8 for git IO
-
- This can be imported instead of Git when more efficient ByteString IO
- is needed.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.ByteString (
module Git,
pipeRead,
pipeWrite,
pipeWriteRead,
pipeNullSplit
) where
import Control.Applicative
import System.Cmd.Utils
import System.IO
import qualified Data.ByteString.Lazy.Char8 as L
import Git hiding (pipeRead, pipeWrite, pipeWriteRead, pipeNullSplit)
import Utility.SafeCommand
{- Runs a git subcommand and returns its output, lazily.
-
- Note that this leaves the git process running, and so zombies will
- result unless reap is called.
-}
pipeRead :: Repo -> [CommandParam] -> IO L.ByteString
pipeRead repo params = assertLocal repo $ do
(_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params
hSetBinaryMode h True
L.hGetContents h
{- Runs a git subcommand, feeding it input.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle
pipeWrite repo params s = assertLocal repo $ do
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params)
L.hPut h s
hClose h
return p
{- Runs a git subcommand, feeding it input, and returning its output.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString)
pipeWriteRead repo params s = assertLocal repo $ do
(p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params)
hSetBinaryMode from True
L.hPut to s
hClose to
c <- L.hGetContents from
return (p, c)
{- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it. -}
pipeNullSplit :: Repo -> [CommandParam] -> IO [L.ByteString]
pipeNullSplit repo params = filter (not . L.null) . L.split '\0' <$>
pipeRead repo params

View file

@ -20,13 +20,11 @@ import Utility.SafeCommand
{- Scans for files that are checked into git at the specified locations. -}
inRepo :: Repo -> [FilePath] -> IO [FilePath]
inRepo repo l = pipeNullSplit repo $
Params "ls-files --cached -z --" : map File l
inRepo repo l = pipeNullSplit repo $ Params "ls-files --cached -z --" : map File l
{- Scans for files at the specified locations that are not checked into git. -}
notInRepo :: Repo -> Bool -> [FilePath] -> IO [FilePath]
notInRepo repo include_ignored l =
pipeNullSplit repo $
notInRepo repo include_ignored l = pipeNullSplit repo $
[Params "ls-files --others"] ++ exclude ++
[Params "-z --"] ++ map File l
where

View file

@ -16,7 +16,7 @@ import Control.Applicative
import System.Posix.Types
import qualified Data.ByteString.Lazy.Char8 as L
import Git.ByteString
import Git
import Utility.SafeCommand
type Treeish = String
@ -31,7 +31,7 @@ data TreeItem = TreeItem
{- Lists the contents of a Treeish -}
lsTree :: Repo -> Treeish -> IO [TreeItem]
lsTree repo t = map parseLsTree <$>
pipeNullSplit repo [Params "ls-tree --full-tree -z -r --", File t]
pipeNullSplitB repo [Params "ls-tree --full-tree -z -r --", File t]
{- Parses a line of ls-tree output.
- (The --long format is not currently supported.) -}

View file

@ -19,7 +19,6 @@ import Data.String.Utils
import qualified Data.ByteString.Lazy.Char8 as L
import Git
import qualified Git.ByteString as GitB
import Utility.SafeCommand
{- Performs a union merge between two branches, staging it in the index.
@ -44,7 +43,7 @@ merge _ _ = error "wrong number of branches to merge"
update_index :: Repo -> [String] -> IO ()
update_index g l = togit ["update-index", "-z", "--index-info"] (join "\0" l)
where
togit ps content = pipeWrite g (map Param ps) content
togit ps content = pipeWrite g (map Param ps) (L.pack content)
>>= forceSuccess
{- Generates a line suitable to be fed into update-index, to add
@ -83,7 +82,7 @@ calc_merge g differ = do
{- Injects some content into git, returning its hash. -}
hashObject :: Repo -> L.ByteString -> IO String
hashObject repo content = getSha subcmd $ do
(h, s) <- GitB.pipeWriteRead repo (map Param params) content
(h, s) <- pipeWriteRead repo (map Param params) content
L.length s `seq` do
forceSuccess h
reap -- XXX unsure why this is needed
@ -100,7 +99,7 @@ mergeFile g (info, file) = case filter (/= nullsha) [asha, bsha] of
[] -> return Nothing
(sha:[]) -> return $ Just $ update_index_line sha file
shas -> do
content <- GitB.pipeRead g $ map Param ("show":shas)
content <- pipeRead g $ map Param ("show":shas)
sha <- hashObject g $ unionmerge content
return $ Just $ update_index_line sha file
where

2
debian/changelog vendored
View file

@ -1,6 +1,6 @@
git-annex (3.20110929) UNRELEASED; urgency=low
* Sped up unused.
* Various speed improvements gained by using ByteStrings.
-- Joey Hess <joeyh@debian.org> Thu, 29 Sep 2011 18:58:53 -0400