Sped up unused.

Added Git.ByteString which replaces Git IO methods with ones using lazy
ByteStrings. This can be more efficient when large quantities of data are
being read from git.

In Git.LsTree, parse git ls-tree output more efficiently, thanks
to ByteString. This benchmarks 25% faster, in a benchmark that includes
(probably predominately) the run time for git ls-tree itself.

In real world numbers, this makes git annex unused 2 seconds faster for
each branch it needs to check, in my usual large repo.
This commit is contained in:
Joey Hess 2011-09-29 19:04:24 -04:00
parent 244ffef43f
commit a91c8a15d5
4 changed files with 88 additions and 19 deletions

5
Git.hs
View file

@ -59,6 +59,7 @@ module Git (
getSha, getSha,
shaSize, shaSize,
commit, commit,
assertLocal,
prop_idempotent_deencode prop_idempotent_deencode
) where ) where
@ -458,8 +459,8 @@ commit g message newref parentrefs = do
ps = concatMap (\r -> ["-p", r]) parentrefs ps = concatMap (\r -> ["-p", r]) parentrefs
{- Reads null terminated output of a git command (as enabled by the -z {- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it into a list of files/lines/whatever. -} - parameter), and splits it. -}
pipeNullSplit :: Repo -> [CommandParam] -> IO [FilePath] pipeNullSplit :: Repo -> [CommandParam] -> IO [String]
pipeNullSplit repo params = filter (not . null) . split "\0" <$> pipeNullSplit repo params = filter (not . null) . split "\0" <$>
pipeRead repo params pipeRead repo params

62
Git/ByteString.hs Normal file
View file

@ -0,0 +1,62 @@
{- module using Data.ByteString.Lazy.Char8 for git IO
-
- This can be imported instead of Git when more efficient ByteString IO
- is needed.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.ByteString (
module Git,
pipeRead,
pipeWrite,
pipeWriteRead,
pipeNullSplit
) where
import Control.Applicative
import System.Cmd.Utils
import System.IO
import qualified Data.ByteString.Lazy.Char8 as L
import Git hiding (pipeRead, pipeWrite, pipeWriteRead, pipeNullSplit)
import Utility.SafeCommand
{- Runs a git subcommand and returns its output, lazily.
-
- Note that this leaves the git process running, and so zombies will
- result unless reap is called.
-}
pipeRead :: Repo -> [CommandParam] -> IO L.ByteString
pipeRead repo params = assertLocal repo $ do
(_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params
hSetBinaryMode h True
L.hGetContents h
{- Runs a git subcommand, feeding it input.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle
pipeWrite repo params s = assertLocal repo $ do
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params)
L.hPut h s
hClose h
return p
{- Runs a git subcommand, feeding it input, and returning its output.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString)
pipeWriteRead repo params s = assertLocal repo $ do
(p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params)
hSetBinaryMode from True
L.hPut to s
hClose to
c <- L.hGetContents from
return (p, c)
{- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it. -}
pipeNullSplit :: Repo -> [CommandParam] -> IO [L.ByteString]
pipeNullSplit repo params = filter (not . L.null) . L.split '\0' <$>
pipeRead repo params

View file

@ -7,22 +7,23 @@
module Git.LsTree ( module Git.LsTree (
TreeItem(..), TreeItem(..),
lsTree lsTree,
parseLsTree
) where ) where
import Numeric import Numeric
import Control.Applicative import Control.Applicative
import Data.Char
import System.Posix.Types import System.Posix.Types
import qualified Data.ByteString.Lazy.Char8 as L
import Git import Git.ByteString
import Utility.SafeCommand import Utility.SafeCommand
type Treeish = String type Treeish = String
data TreeItem = TreeItem data TreeItem = TreeItem
{ mode :: FileMode { mode :: FileMode
, objtype :: String , typeobj :: String
, sha :: String , sha :: String
, file :: FilePath , file :: FilePath
} deriving Show } deriving Show
@ -34,18 +35,17 @@ lsTree repo t = map parseLsTree <$>
{- Parses a line of ls-tree output. {- Parses a line of ls-tree output.
- (The --long format is not currently supported.) -} - (The --long format is not currently supported.) -}
parseLsTree :: String -> TreeItem parseLsTree :: L.ByteString -> TreeItem
parseLsTree l = TreeItem m o s f parseLsTree l = TreeItem
(fst $ head $ readOct $ L.unpack m)
(L.unpack t)
(L.unpack s)
(decodeGitFile $ L.unpack f)
where where
-- l = <mode> SP <type> SP <sha> TAB <file> -- l = <mode> SP <type> SP <sha> TAB <file>
-- Since everything until the file is fixed-width, -- All fields are fixed, so we can pull them out of
-- do not need to split on words. -- specific positions in the line.
(m, past_m) = head $ readOct l (m, past_m) = L.splitAt 7 l
(o, past_o) = splitAt 4 $ space past_m (t, past_t) = L.splitAt 4 past_m
(s, past_s) = splitAt shaSize $ space past_o (s, past_s) = L.splitAt 40 $ L.tail past_t
f = decodeGitFile $ space past_s f = L.tail past_s
space (sp:rest)
| isSpace sp = rest
| otherwise = parseerr
space [] = parseerr
parseerr = "ls-tree parse error: " ++ l

6
debian/changelog vendored
View file

@ -1,3 +1,9 @@
git-annex (3.20110929) UNRELEASED; urgency=low
* Sped up unused.
-- Joey Hess <joeyh@debian.org> Thu, 29 Sep 2011 18:58:53 -0400
git-annex (3.20110928) unstable; urgency=low git-annex (3.20110928) unstable; urgency=low
* --in can be used to make git-annex only operate on files * --in can be used to make git-annex only operate on files