Sped up unused.
Added Git.ByteString which replaces Git IO methods with ones using lazy ByteStrings. This can be more efficient when large quantities of data are being read from git. In Git.LsTree, parse git ls-tree output more efficiently, thanks to ByteString. This benchmarks 25% faster, in a benchmark that includes (probably predominately) the run time for git ls-tree itself. In real world numbers, this makes git annex unused 2 seconds faster for each branch it needs to check, in my usual large repo.
This commit is contained in:
parent
244ffef43f
commit
a91c8a15d5
4 changed files with 88 additions and 19 deletions
5
Git.hs
5
Git.hs
|
@ -59,6 +59,7 @@ module Git (
|
|||
getSha,
|
||||
shaSize,
|
||||
commit,
|
||||
assertLocal,
|
||||
|
||||
prop_idempotent_deencode
|
||||
) where
|
||||
|
@ -458,8 +459,8 @@ commit g message newref parentrefs = do
|
|||
ps = concatMap (\r -> ["-p", r]) parentrefs
|
||||
|
||||
{- Reads null terminated output of a git command (as enabled by the -z
|
||||
- parameter), and splits it into a list of files/lines/whatever. -}
|
||||
pipeNullSplit :: Repo -> [CommandParam] -> IO [FilePath]
|
||||
- parameter), and splits it. -}
|
||||
pipeNullSplit :: Repo -> [CommandParam] -> IO [String]
|
||||
pipeNullSplit repo params = filter (not . null) . split "\0" <$>
|
||||
pipeRead repo params
|
||||
|
||||
|
|
62
Git/ByteString.hs
Normal file
62
Git/ByteString.hs
Normal file
|
@ -0,0 +1,62 @@
|
|||
{- module using Data.ByteString.Lazy.Char8 for git IO
|
||||
-
|
||||
- This can be imported instead of Git when more efficient ByteString IO
|
||||
- is needed.
|
||||
-
|
||||
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Git.ByteString (
|
||||
module Git,
|
||||
pipeRead,
|
||||
pipeWrite,
|
||||
pipeWriteRead,
|
||||
pipeNullSplit
|
||||
) where
|
||||
|
||||
import Control.Applicative
|
||||
import System.Cmd.Utils
|
||||
import System.IO
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
|
||||
import Git hiding (pipeRead, pipeWrite, pipeWriteRead, pipeNullSplit)
|
||||
import Utility.SafeCommand
|
||||
|
||||
{- Runs a git subcommand and returns its output, lazily.
|
||||
-
|
||||
- Note that this leaves the git process running, and so zombies will
|
||||
- result unless reap is called.
|
||||
-}
|
||||
pipeRead :: Repo -> [CommandParam] -> IO L.ByteString
|
||||
pipeRead repo params = assertLocal repo $ do
|
||||
(_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params
|
||||
hSetBinaryMode h True
|
||||
L.hGetContents h
|
||||
|
||||
{- Runs a git subcommand, feeding it input.
|
||||
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
|
||||
pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle
|
||||
pipeWrite repo params s = assertLocal repo $ do
|
||||
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params)
|
||||
L.hPut h s
|
||||
hClose h
|
||||
return p
|
||||
|
||||
{- Runs a git subcommand, feeding it input, and returning its output.
|
||||
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
|
||||
pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString)
|
||||
pipeWriteRead repo params s = assertLocal repo $ do
|
||||
(p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params)
|
||||
hSetBinaryMode from True
|
||||
L.hPut to s
|
||||
hClose to
|
||||
c <- L.hGetContents from
|
||||
return (p, c)
|
||||
|
||||
{- Reads null terminated output of a git command (as enabled by the -z
|
||||
- parameter), and splits it. -}
|
||||
pipeNullSplit :: Repo -> [CommandParam] -> IO [L.ByteString]
|
||||
pipeNullSplit repo params = filter (not . L.null) . L.split '\0' <$>
|
||||
pipeRead repo params
|
|
@ -7,22 +7,23 @@
|
|||
|
||||
module Git.LsTree (
|
||||
TreeItem(..),
|
||||
lsTree
|
||||
lsTree,
|
||||
parseLsTree
|
||||
) where
|
||||
|
||||
import Numeric
|
||||
import Control.Applicative
|
||||
import Data.Char
|
||||
import System.Posix.Types
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
|
||||
import Git
|
||||
import Git.ByteString
|
||||
import Utility.SafeCommand
|
||||
|
||||
type Treeish = String
|
||||
|
||||
data TreeItem = TreeItem
|
||||
{ mode :: FileMode
|
||||
, objtype :: String
|
||||
, typeobj :: String
|
||||
, sha :: String
|
||||
, file :: FilePath
|
||||
} deriving Show
|
||||
|
@ -34,18 +35,17 @@ lsTree repo t = map parseLsTree <$>
|
|||
|
||||
{- Parses a line of ls-tree output.
|
||||
- (The --long format is not currently supported.) -}
|
||||
parseLsTree :: String -> TreeItem
|
||||
parseLsTree l = TreeItem m o s f
|
||||
parseLsTree :: L.ByteString -> TreeItem
|
||||
parseLsTree l = TreeItem
|
||||
(fst $ head $ readOct $ L.unpack m)
|
||||
(L.unpack t)
|
||||
(L.unpack s)
|
||||
(decodeGitFile $ L.unpack f)
|
||||
where
|
||||
-- l = <mode> SP <type> SP <sha> TAB <file>
|
||||
-- Since everything until the file is fixed-width,
|
||||
-- do not need to split on words.
|
||||
(m, past_m) = head $ readOct l
|
||||
(o, past_o) = splitAt 4 $ space past_m
|
||||
(s, past_s) = splitAt shaSize $ space past_o
|
||||
f = decodeGitFile $ space past_s
|
||||
space (sp:rest)
|
||||
| isSpace sp = rest
|
||||
| otherwise = parseerr
|
||||
space [] = parseerr
|
||||
parseerr = "ls-tree parse error: " ++ l
|
||||
-- All fields are fixed, so we can pull them out of
|
||||
-- specific positions in the line.
|
||||
(m, past_m) = L.splitAt 7 l
|
||||
(t, past_t) = L.splitAt 4 past_m
|
||||
(s, past_s) = L.splitAt 40 $ L.tail past_t
|
||||
f = L.tail past_s
|
||||
|
|
6
debian/changelog
vendored
6
debian/changelog
vendored
|
@ -1,3 +1,9 @@
|
|||
git-annex (3.20110929) UNRELEASED; urgency=low
|
||||
|
||||
* Sped up unused.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Thu, 29 Sep 2011 18:58:53 -0400
|
||||
|
||||
git-annex (3.20110928) unstable; urgency=low
|
||||
|
||||
* --in can be used to make git-annex only operate on files
|
||||
|
|
Loading…
Reference in a new issue