sped up git-union-merge

Avoided the slow git add, instead inject content directly into git and
populate the index all in one pass. Now this runs on my large real-world
repo in 10 seconds, which is acceptable.

Also lots of code cleanups.
This commit is contained in:
Joey Hess 2011-06-20 21:35:39 -04:00
parent c835166a7c
commit d519bc7137
2 changed files with 74 additions and 53 deletions

View file

@ -38,6 +38,8 @@ module GitRepo (
gitCommandLine, gitCommandLine,
run, run,
pipeRead, pipeRead,
pipeWrite,
pipeWriteRead,
pipeNullSplit, pipeNullSplit,
attributes, attributes,
remotes, remotes,
@ -348,7 +350,7 @@ run repo subcommand params = assertLocal repo $
boolSystem "git" (gitCommandLine repo ((Param subcommand):params)) boolSystem "git" (gitCommandLine repo ((Param subcommand):params))
>>! error $ "git " ++ show params ++ " failed" >>! error $ "git " ++ show params ++ " failed"
{- Runs a git subcommand and returns it output, lazily. {- Runs a git subcommand and returns its output, lazily.
- -
- Note that this leaves the git process running, and so zombies will - Note that this leaves the git process running, and so zombies will
- result unless reap is called. - result unless reap is called.
@ -358,6 +360,18 @@ pipeRead repo params = assertLocal repo $ do
(_, s) <- pipeFrom "git" $ toCommand $ gitCommandLine repo params (_, s) <- pipeFrom "git" $ toCommand $ gitCommandLine repo params
return s return s
{- Runs a git subcommand, feeding it input.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWrite :: Repo -> [CommandParam] -> String -> IO PipeHandle
pipeWrite repo params s = assertLocal repo $
pipeTo "git" (toCommand $ gitCommandLine repo params) s
{- Runs a git subcommand, feeding it input, and returning its output.
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
pipeWriteRead :: Repo -> [CommandParam] -> String -> IO (PipeHandle, String)
pipeWriteRead repo params s = assertLocal repo $
pipeBoth "git" (toCommand $ gitCommandLine repo params) s
{- Reaps any zombie git processes. -} {- Reaps any zombie git processes. -}
reap :: IO () reap :: IO ()
reap = do reap = do

View file

@ -8,12 +8,12 @@
import System.Environment import System.Environment
import System.FilePath import System.FilePath
import System.Directory import System.Directory
import System.Cmd
import System.Cmd.Utils import System.Cmd.Utils
import System.Posix.Env (setEnv) import System.Posix.Env (setEnv)
import System.Posix.Directory (changeWorkingDirectory) import Control.Monad (when)
import Control.Monad (when, unless)
import Data.List import Data.List
import Data.Maybe
import Data.String.Utils
import qualified GitRepo as Git import qualified GitRepo as Git
import Utility import Utility
@ -39,82 +39,89 @@ parseArgs = do
then usage then usage
else return args else return args
tmpDir :: Git.Repo -> FilePath
tmpDir g = Git.workTree g </> Git.gitDir g </> "tmp" </> "git-union-merge"
tmpIndex :: Git.Repo -> FilePath tmpIndex :: Git.Repo -> FilePath
tmpIndex g = Git.workTree g </> Git.gitDir g </> "tmp" </> "git-union-merge.index" tmpIndex g = Git.workTree g </> Git.gitDir g </> "index.git-union-merge"
{- Moves to a temporary directory, and configures git to use it as its {- Configures git to use a temporary index file. -}
- working tree, and to use a temporary index file as well. -}
setup :: IO Git.Repo setup :: IO Git.Repo
setup = do setup = do
g <- Git.configRead =<< Git.repoFromCwd g <- Git.configRead =<< Git.repoFromCwd
cleanup g -- idempotency cleanup g -- idempotency
let tmp = tmpDir g
createDirectoryIfMissing True tmp
changeWorkingDirectory tmp
-- Note that due to these variables being set, Git.run and
-- similar helpers cannot be used, as they override the work tree.
-- It is only safe to use Git.run etc when doing things that do
-- not operate on the work tree.
setEnv "GIT_WORK_TREE" tmp True
setEnv "GIT_INDEX_FILE" (tmpIndex g) True setEnv "GIT_INDEX_FILE" (tmpIndex g) True
return g return g
cleanup :: Git.Repo -> IO () cleanup :: Git.Repo -> IO ()
cleanup g = do cleanup g = do
e <- doesDirectoryExist (tmpDir g)
when e $ removeDirectoryRecursive (tmpDir g)
e' <- doesFileExist (tmpIndex g) e' <- doesFileExist (tmpIndex g)
when e' $ removeFile (tmpIndex g) when e' $ removeFile (tmpIndex g)
{- Stages the content of both refs into the index. -} {- Stages the content of both refs into the index. -}
stage :: Git.Repo -> String -> String -> IO () stage :: Git.Repo -> String -> String -> IO ()
stage g aref bref = do stage g aref bref = do
-- populate index with the contents of aref, as a starting point -- Get the contents of aref, as a starting point.
_ <- system $ "git ls-tree -r --full-name --full-tree " ++ aref ++ ls <- fromgit
" | git update-index --index-info" ["ls-tree", "-z", "-r", "--full-tree", aref]
-- identify files that are different in bref, and stage merged files -- Identify files that are different between aref and bref, and
diff <- Git.pipeNullSplit g $ map Param -- inject merged versions into git.
["diff-tree", "--raw", "-z", "--no-renames", "-l0", aref, bref] diff <- fromgit
mapM_ genfile (pairs diff) ["diff-tree", "--raw", "-z", "-r", "--no-renames", "-l0", aref, bref]
_ <- system "git add ." ls' <- mapM mergefile (pairs diff)
return () -- Populate the index file. Later lines override earlier ones.
togit ["update-index", "-z", "--index-info"]
(join "\0" $ ls++catMaybes ls')
where where
fromgit l = Git.pipeNullSplit g (map Param l)
togit l content = Git.pipeWrite g (map Param l) content
>>= forceSuccess
tofromgit l content = do
(h, s) <- Git.pipeWriteRead g (map Param l) content
length s `seq` do
forceSuccess h
Git.reap
return ((), s)
pairs [] = [] pairs [] = []
pairs (_:[]) = error "parse error" pairs (_:[]) = error "parse error"
pairs (a:b:rest) = (a,b):pairs rest pairs (a:b:rest) = (a,b):pairs rest
nullsha = take 40 $ repeat '0' nullsha = take shaSize $ repeat '0'
ls_tree_line sha file = "100644 blob " ++ sha ++ "\t" ++ file
genfile (info, file) = do unionmerge = unlines . nub . lines
mergefile (info, file) = do
let [_colonamode, _bmode, asha, bsha, _status] = words info let [_colonamode, _bmode, asha, bsha, _status] = words info
let shas = if bsha == nullsha
if bsha == nullsha then return Nothing -- already staged from aref
then [] -- staged from aref else mergefile' file asha bsha
else mergefile' file asha bsha = do
if asha == nullsha let shas = filter (/= nullsha) [asha, bsha]
then [bsha] content <- Git.pipeRead g $ map Param ("show":shas)
else [asha, bsha] sha <- getSha "hash-object" $
unless (null shas) $ do tofromgit ["hash-object", "-w", "--stdin"] $
content <- Git.pipeRead g $ map Param ("show":shas) unionmerge content
writeFile file $ unlines $ nub $ lines content return $ Just $ ls_tree_line sha file
{- Commits the index into the specified branch. -} {- Commits the index into the specified branch. -}
commit :: Git.Repo -> String -> String -> String -> IO () commit :: Git.Repo -> String -> String -> String -> IO ()
commit g branch aref bref = do commit g branch aref bref = do
tree <- getsha $ tree <- getSha "write-tree" $
pipeFrom "git" ["write-tree"] pipeFrom "git" ["write-tree"]
sha <- getsha $ sha <- getSha "commit-tree" $
pipeBoth "git" ["commit-tree", tree, "-p", aref, "-p", bref] pipeBoth "git" ["commit-tree", tree, "-p", aref, "-p", bref]
"union merge" "union merge"
Git.run g "update-ref" [Param $ "refs/heads/" ++ branch, Param sha] Git.run g "update-ref" [Param $ "refs/heads/" ++ branch, Param sha]
where
getsha a = do {- Runs an action that causes a git subcommand to emit a sha, and strips
(_, t) <- a any trailing newline, returning the sha. -}
let t' = if last t == '\n' getSha :: String -> IO (a, String) -> IO String
then take (length t - 1) t getSha subcommand a = do
else t (_, t) <- a
when (null t') $ error "failed to read sha from git" let t' = if last t == '\n'
return t' then take (length t - 1) t
else t
when (length t' /= shaSize) $
error $ "failed to read sha from git " ++ subcommand ++ " (" ++ t' ++ ")"
return t'
shaSize :: Int
shaSize = 40