Sped up unused.

Added Git.ByteString which replaces Git IO methods with ones using lazy
ByteStrings. This can be more efficient when large quantities of data are
being read from git.

In Git.LsTree, parse git ls-tree output more efficiently, thanks
to ByteString. This benchmarks 25% faster, in a benchmark that includes
(probably predominately) the run time for git ls-tree itself.

In real world numbers, this makes git annex unused 2 seconds faster for
each branch it needs to check, in my usual large repo.
This commit is contained in:
Joey Hess 2011-09-29 19:04:24 -04:00
parent 244ffef43f
commit a91c8a15d5
4 changed files with 88 additions and 19 deletions

View file

@ -7,22 +7,23 @@
module Git.LsTree (
TreeItem(..),
lsTree
lsTree,
parseLsTree
) where
import Numeric
import Control.Applicative
import Data.Char
import System.Posix.Types
import qualified Data.ByteString.Lazy.Char8 as L
import Git
import Git.ByteString
import Utility.SafeCommand
type Treeish = String
data TreeItem = TreeItem
{ mode :: FileMode
, objtype :: String
, typeobj :: String
, sha :: String
, file :: FilePath
} deriving Show
@ -34,18 +35,17 @@ lsTree repo t = map parseLsTree <$>
{- Parses a line of ls-tree output.
- (The --long format is not currently supported.) -}
parseLsTree :: String -> TreeItem
parseLsTree l = TreeItem m o s f
parseLsTree :: L.ByteString -> TreeItem
parseLsTree l = TreeItem
(fst $ head $ readOct $ L.unpack m)
(L.unpack t)
(L.unpack s)
(decodeGitFile $ L.unpack f)
where
-- l = <mode> SP <type> SP <sha> TAB <file>
-- Since everything until the file is fixed-width,
-- do not need to split on words.
(m, past_m) = head $ readOct l
(o, past_o) = splitAt 4 $ space past_m
(s, past_s) = splitAt shaSize $ space past_o
f = decodeGitFile $ space past_s
space (sp:rest)
| isSpace sp = rest
| otherwise = parseerr
space [] = parseerr
parseerr = "ls-tree parse error: " ++ l
-- All fields are fixed, so we can pull them out of
-- specific positions in the line.
(m, past_m) = L.splitAt 7 l
(t, past_t) = L.splitAt 4 past_m
(s, past_s) = L.splitAt 40 $ L.tail past_t
f = L.tail past_s