convert all git read/write functions to use ByteStrings
This yields a second or so speedup in unused, find, etc. Seems that even when the ByteString is immediately split and then converted to Strings, it's faster. I may try to push ByteStrings out into more of git-annex gradually, although I suspect most of the time-critical parts are already covered now, and many of the rest rely on libraries that only support Strings.
This commit is contained in:
parent
949ef94d5e
commit
7ff89ccfee
8 changed files with 49 additions and 94 deletions
|
@ -30,6 +30,7 @@ import System.IO
|
|||
import System.IO.Binary
|
||||
import System.Posix.Process
|
||||
import System.Exit
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
|
||||
import Types.BranchState
|
||||
import qualified Git
|
||||
|
@ -181,7 +182,7 @@ siblingBranches :: Annex [String]
|
|||
siblingBranches = do
|
||||
g <- Annex.gitRepo
|
||||
r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name]
|
||||
return $ map (last . words) (lines r)
|
||||
return $ map (last . words . L.unpack) (L.lines r)
|
||||
|
||||
{- Ensures that a given ref has been merged into the index. -}
|
||||
updateRef :: GitRef -> Annex (Maybe String)
|
||||
|
@ -196,7 +197,7 @@ updateRef ref
|
|||
Param (name++".."++ref),
|
||||
Params "--oneline -n1"
|
||||
]
|
||||
if null diffs
|
||||
if L.null diffs
|
||||
then return Nothing
|
||||
else do
|
||||
showSideAction $ "merging " ++ Git.refDescribe ref ++ " into " ++ name
|
||||
|
|
|
@ -16,6 +16,7 @@ import Data.Maybe
|
|||
import System.FilePath
|
||||
import System.Directory
|
||||
import Data.List
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
|
||||
import Command
|
||||
import Types
|
||||
|
@ -172,7 +173,7 @@ excludeReferenced l = do
|
|||
refs = map last .
|
||||
nubBy cmpheads .
|
||||
filter ourbranches .
|
||||
map words . lines
|
||||
map words . lines . L.unpack
|
||||
cmpheads a b = head a == head b
|
||||
ourbranchend = '/' : Branch.name
|
||||
ourbranches ws = not $ ourbranchend `isSuffixOf` last ws
|
||||
|
|
54
Git.hs
54
Git.hs
|
@ -44,6 +44,7 @@ module Git (
|
|||
pipeWrite,
|
||||
pipeWriteRead,
|
||||
pipeNullSplit,
|
||||
pipeNullSplitB,
|
||||
attributes,
|
||||
remotes,
|
||||
remotesAdd,
|
||||
|
@ -85,6 +86,7 @@ import Text.Printf
|
|||
import Data.List (isInfixOf, isPrefixOf, isSuffixOf)
|
||||
import System.Exit
|
||||
import System.Posix.Env (setEnv, unsetEnv, getEnv)
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
|
||||
import Utility
|
||||
import Utility.Path
|
||||
|
@ -379,22 +381,41 @@ run repo subcommand params = assertLocal repo $
|
|||
- Note that this leaves the git process running, and so zombies will
|
||||
- result unless reap is called.
|
||||
-}
|
||||
pipeRead :: Repo -> [CommandParam] -> IO String
|
||||
pipeRead :: Repo -> [CommandParam] -> IO L.ByteString
|
||||
pipeRead repo params = assertLocal repo $ do
|
||||
(_, s) <- pipeFrom "git" $ toCommand $ gitCommandLine repo params
|
||||
return s
|
||||
(_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params
|
||||
hSetBinaryMode h True
|
||||
L.hGetContents h
|
||||
|
||||
{- Runs a git subcommand, feeding it input.
|
||||
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
|
||||
pipeWrite :: Repo -> [CommandParam] -> String -> IO PipeHandle
|
||||
pipeWrite repo params s = assertLocal repo $
|
||||
pipeTo "git" (toCommand $ gitCommandLine repo params) s
|
||||
pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle
|
||||
pipeWrite repo params s = assertLocal repo $ do
|
||||
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params)
|
||||
L.hPut h s
|
||||
hClose h
|
||||
return p
|
||||
|
||||
{- Runs a git subcommand, feeding it input, and returning its output.
|
||||
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
|
||||
pipeWriteRead :: Repo -> [CommandParam] -> String -> IO (PipeHandle, String)
|
||||
pipeWriteRead repo params s = assertLocal repo $
|
||||
pipeBoth "git" (toCommand $ gitCommandLine repo params) s
|
||||
pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString)
|
||||
pipeWriteRead repo params s = assertLocal repo $ do
|
||||
(p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params)
|
||||
hSetBinaryMode from True
|
||||
L.hPut to s
|
||||
hClose to
|
||||
c <- L.hGetContents from
|
||||
return (p, c)
|
||||
|
||||
{- Reads null terminated output of a git command (as enabled by the -z
|
||||
- parameter), and splits it. -}
|
||||
pipeNullSplit :: Repo -> [CommandParam] -> IO [String]
|
||||
pipeNullSplit repo params = map L.unpack <$> pipeNullSplitB repo params
|
||||
|
||||
{- For when Strings are not needed. -}
|
||||
pipeNullSplitB :: Repo -> [CommandParam] -> IO [L.ByteString]
|
||||
pipeNullSplitB repo params = filter (not . L.null) . L.split '\0' <$>
|
||||
pipeRead repo params
|
||||
|
||||
{- Reaps any zombie git processes. -}
|
||||
reap :: IO ()
|
||||
|
@ -436,21 +457,18 @@ shaSize = 40
|
|||
- with the specified parent refs. -}
|
||||
commit :: Repo -> String -> String -> [String] -> IO ()
|
||||
commit g message newref parentrefs = do
|
||||
tree <- getSha "write-tree" $
|
||||
tree <- getSha "write-tree" $ asString $
|
||||
pipeRead g [Param "write-tree"]
|
||||
sha <- getSha "commit-tree" $ ignorehandle $
|
||||
pipeWriteRead g (map Param $ ["commit-tree", tree] ++ ps) message
|
||||
sha <- getSha "commit-tree" $ asString $
|
||||
ignorehandle $ pipeWriteRead g
|
||||
(map Param $ ["commit-tree", tree] ++ ps)
|
||||
(L.pack message)
|
||||
run g "update-ref" [Param newref, Param sha]
|
||||
where
|
||||
ignorehandle a = snd <$> a
|
||||
asString a = L.unpack <$> a
|
||||
ps = concatMap (\r -> ["-p", r]) parentrefs
|
||||
|
||||
{- Reads null terminated output of a git command (as enabled by the -z
|
||||
- parameter), and splits it. -}
|
||||
pipeNullSplit :: Repo -> [CommandParam] -> IO [String]
|
||||
pipeNullSplit repo params = filter (not . null) . split "\0" <$>
|
||||
pipeRead repo params
|
||||
|
||||
{- Runs git config and populates a repo with its config. -}
|
||||
configRead :: Repo -> IO Repo
|
||||
configRead repo@(Repo { location = Dir d }) = do
|
||||
|
|
|
@ -1,62 +0,0 @@
|
|||
{- module using Data.ByteString.Lazy.Char8 for git IO
|
||||
-
|
||||
- This can be imported instead of Git when more efficient ByteString IO
|
||||
- is needed.
|
||||
-
|
||||
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Git.ByteString (
|
||||
module Git,
|
||||
pipeRead,
|
||||
pipeWrite,
|
||||
pipeWriteRead,
|
||||
pipeNullSplit
|
||||
) where
|
||||
|
||||
import Control.Applicative
|
||||
import System.Cmd.Utils
|
||||
import System.IO
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
|
||||
import Git hiding (pipeRead, pipeWrite, pipeWriteRead, pipeNullSplit)
|
||||
import Utility.SafeCommand
|
||||
|
||||
{- Runs a git subcommand and returns its output, lazily.
|
||||
-
|
||||
- Note that this leaves the git process running, and so zombies will
|
||||
- result unless reap is called.
|
||||
-}
|
||||
pipeRead :: Repo -> [CommandParam] -> IO L.ByteString
|
||||
pipeRead repo params = assertLocal repo $ do
|
||||
(_, h) <- hPipeFrom "git" $ toCommand $ gitCommandLine repo params
|
||||
hSetBinaryMode h True
|
||||
L.hGetContents h
|
||||
|
||||
{- Runs a git subcommand, feeding it input.
|
||||
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
|
||||
pipeWrite :: Repo -> [CommandParam] -> L.ByteString -> IO PipeHandle
|
||||
pipeWrite repo params s = assertLocal repo $ do
|
||||
(p, h) <- hPipeTo "git" (toCommand $ gitCommandLine repo params)
|
||||
L.hPut h s
|
||||
hClose h
|
||||
return p
|
||||
|
||||
{- Runs a git subcommand, feeding it input, and returning its output.
|
||||
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
|
||||
pipeWriteRead :: Repo -> [CommandParam] -> L.ByteString -> IO (PipeHandle, L.ByteString)
|
||||
pipeWriteRead repo params s = assertLocal repo $ do
|
||||
(p, from, to) <- hPipeBoth "git" (toCommand $ gitCommandLine repo params)
|
||||
hSetBinaryMode from True
|
||||
L.hPut to s
|
||||
hClose to
|
||||
c <- L.hGetContents from
|
||||
return (p, c)
|
||||
|
||||
{- Reads null terminated output of a git command (as enabled by the -z
|
||||
- parameter), and splits it. -}
|
||||
pipeNullSplit :: Repo -> [CommandParam] -> IO [L.ByteString]
|
||||
pipeNullSplit repo params = filter (not . L.null) . L.split '\0' <$>
|
||||
pipeRead repo params
|
|
@ -20,13 +20,11 @@ import Utility.SafeCommand
|
|||
|
||||
{- Scans for files that are checked into git at the specified locations. -}
|
||||
inRepo :: Repo -> [FilePath] -> IO [FilePath]
|
||||
inRepo repo l = pipeNullSplit repo $
|
||||
Params "ls-files --cached -z --" : map File l
|
||||
inRepo repo l = pipeNullSplit repo $ Params "ls-files --cached -z --" : map File l
|
||||
|
||||
{- Scans for files at the specified locations that are not checked into git. -}
|
||||
notInRepo :: Repo -> Bool -> [FilePath] -> IO [FilePath]
|
||||
notInRepo repo include_ignored l =
|
||||
pipeNullSplit repo $
|
||||
notInRepo repo include_ignored l = pipeNullSplit repo $
|
||||
[Params "ls-files --others"] ++ exclude ++
|
||||
[Params "-z --"] ++ map File l
|
||||
where
|
||||
|
|
|
@ -16,7 +16,7 @@ import Control.Applicative
|
|||
import System.Posix.Types
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
|
||||
import Git.ByteString
|
||||
import Git
|
||||
import Utility.SafeCommand
|
||||
|
||||
type Treeish = String
|
||||
|
@ -31,7 +31,7 @@ data TreeItem = TreeItem
|
|||
{- Lists the contents of a Treeish -}
|
||||
lsTree :: Repo -> Treeish -> IO [TreeItem]
|
||||
lsTree repo t = map parseLsTree <$>
|
||||
pipeNullSplit repo [Params "ls-tree --full-tree -z -r --", File t]
|
||||
pipeNullSplitB repo [Params "ls-tree --full-tree -z -r --", File t]
|
||||
|
||||
{- Parses a line of ls-tree output.
|
||||
- (The --long format is not currently supported.) -}
|
||||
|
|
|
@ -19,7 +19,6 @@ import Data.String.Utils
|
|||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
|
||||
import Git
|
||||
import qualified Git.ByteString as GitB
|
||||
import Utility.SafeCommand
|
||||
|
||||
{- Performs a union merge between two branches, staging it in the index.
|
||||
|
@ -44,7 +43,7 @@ merge _ _ = error "wrong number of branches to merge"
|
|||
update_index :: Repo -> [String] -> IO ()
|
||||
update_index g l = togit ["update-index", "-z", "--index-info"] (join "\0" l)
|
||||
where
|
||||
togit ps content = pipeWrite g (map Param ps) content
|
||||
togit ps content = pipeWrite g (map Param ps) (L.pack content)
|
||||
>>= forceSuccess
|
||||
|
||||
{- Generates a line suitable to be fed into update-index, to add
|
||||
|
@ -83,7 +82,7 @@ calc_merge g differ = do
|
|||
{- Injects some content into git, returning its hash. -}
|
||||
hashObject :: Repo -> L.ByteString -> IO String
|
||||
hashObject repo content = getSha subcmd $ do
|
||||
(h, s) <- GitB.pipeWriteRead repo (map Param params) content
|
||||
(h, s) <- pipeWriteRead repo (map Param params) content
|
||||
L.length s `seq` do
|
||||
forceSuccess h
|
||||
reap -- XXX unsure why this is needed
|
||||
|
@ -100,7 +99,7 @@ mergeFile g (info, file) = case filter (/= nullsha) [asha, bsha] of
|
|||
[] -> return Nothing
|
||||
(sha:[]) -> return $ Just $ update_index_line sha file
|
||||
shas -> do
|
||||
content <- GitB.pipeRead g $ map Param ("show":shas)
|
||||
content <- pipeRead g $ map Param ("show":shas)
|
||||
sha <- hashObject g $ unionmerge content
|
||||
return $ Just $ update_index_line sha file
|
||||
where
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -1,6 +1,6 @@
|
|||
git-annex (3.20110929) UNRELEASED; urgency=low
|
||||
|
||||
* Sped up unused.
|
||||
* Various speed improvements gained by using ByteStrings.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Thu, 29 Sep 2011 18:58:53 -0400
|
||||
|
||||
|
|
Loading…
Reference in a new issue