sped up git-union-merge
Avoided the slow git add, instead inject content directly into git and populate the index all in one pass. Now this runs on my large real-world repo in 10 seconds, which is acceptable. Also lots of code cleanups.
This commit is contained in:
parent
c835166a7c
commit
d519bc7137
2 changed files with 74 additions and 53 deletions
16
GitRepo.hs
16
GitRepo.hs
|
@ -38,6 +38,8 @@ module GitRepo (
|
||||||
gitCommandLine,
|
gitCommandLine,
|
||||||
run,
|
run,
|
||||||
pipeRead,
|
pipeRead,
|
||||||
|
pipeWrite,
|
||||||
|
pipeWriteRead,
|
||||||
pipeNullSplit,
|
pipeNullSplit,
|
||||||
attributes,
|
attributes,
|
||||||
remotes,
|
remotes,
|
||||||
|
@ -348,7 +350,7 @@ run repo subcommand params = assertLocal repo $
|
||||||
boolSystem "git" (gitCommandLine repo ((Param subcommand):params))
|
boolSystem "git" (gitCommandLine repo ((Param subcommand):params))
|
||||||
>>! error $ "git " ++ show params ++ " failed"
|
>>! error $ "git " ++ show params ++ " failed"
|
||||||
|
|
||||||
{- Runs a git subcommand and returns it output, lazily.
|
{- Runs a git subcommand and returns its output, lazily.
|
||||||
-
|
-
|
||||||
- Note that this leaves the git process running, and so zombies will
|
- Note that this leaves the git process running, and so zombies will
|
||||||
- result unless reap is called.
|
- result unless reap is called.
|
||||||
|
@ -358,6 +360,18 @@ pipeRead repo params = assertLocal repo $ do
|
||||||
(_, s) <- pipeFrom "git" $ toCommand $ gitCommandLine repo params
|
(_, s) <- pipeFrom "git" $ toCommand $ gitCommandLine repo params
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
{- Runs a git subcommand, feeding it input.
|
||||||
|
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
|
||||||
|
pipeWrite :: Repo -> [CommandParam] -> String -> IO PipeHandle
|
||||||
|
pipeWrite repo params s = assertLocal repo $
|
||||||
|
pipeTo "git" (toCommand $ gitCommandLine repo params) s
|
||||||
|
|
||||||
|
{- Runs a git subcommand, feeding it input, and returning its output.
|
||||||
|
- You should call either getProcessStatus or forceSuccess on the PipeHandle. -}
|
||||||
|
pipeWriteRead :: Repo -> [CommandParam] -> String -> IO (PipeHandle, String)
|
||||||
|
pipeWriteRead repo params s = assertLocal repo $
|
||||||
|
pipeBoth "git" (toCommand $ gitCommandLine repo params) s
|
||||||
|
|
||||||
{- Reaps any zombie git processes. -}
|
{- Reaps any zombie git processes. -}
|
||||||
reap :: IO ()
|
reap :: IO ()
|
||||||
reap = do
|
reap = do
|
||||||
|
|
|
@ -8,12 +8,12 @@
|
||||||
import System.Environment
|
import System.Environment
|
||||||
import System.FilePath
|
import System.FilePath
|
||||||
import System.Directory
|
import System.Directory
|
||||||
import System.Cmd
|
|
||||||
import System.Cmd.Utils
|
import System.Cmd.Utils
|
||||||
import System.Posix.Env (setEnv)
|
import System.Posix.Env (setEnv)
|
||||||
import System.Posix.Directory (changeWorkingDirectory)
|
import Control.Monad (when)
|
||||||
import Control.Monad (when, unless)
|
|
||||||
import Data.List
|
import Data.List
|
||||||
|
import Data.Maybe
|
||||||
|
import Data.String.Utils
|
||||||
|
|
||||||
import qualified GitRepo as Git
|
import qualified GitRepo as Git
|
||||||
import Utility
|
import Utility
|
||||||
|
@ -39,82 +39,89 @@ parseArgs = do
|
||||||
then usage
|
then usage
|
||||||
else return args
|
else return args
|
||||||
|
|
||||||
tmpDir :: Git.Repo -> FilePath
|
|
||||||
tmpDir g = Git.workTree g </> Git.gitDir g </> "tmp" </> "git-union-merge"
|
|
||||||
|
|
||||||
tmpIndex :: Git.Repo -> FilePath
|
tmpIndex :: Git.Repo -> FilePath
|
||||||
tmpIndex g = Git.workTree g </> Git.gitDir g </> "tmp" </> "git-union-merge.index"
|
tmpIndex g = Git.workTree g </> Git.gitDir g </> "index.git-union-merge"
|
||||||
|
|
||||||
{- Moves to a temporary directory, and configures git to use it as its
|
{- Configures git to use a temporary index file. -}
|
||||||
- working tree, and to use a temporary index file as well. -}
|
|
||||||
setup :: IO Git.Repo
|
setup :: IO Git.Repo
|
||||||
setup = do
|
setup = do
|
||||||
g <- Git.configRead =<< Git.repoFromCwd
|
g <- Git.configRead =<< Git.repoFromCwd
|
||||||
cleanup g -- idempotency
|
cleanup g -- idempotency
|
||||||
let tmp = tmpDir g
|
|
||||||
createDirectoryIfMissing True tmp
|
|
||||||
changeWorkingDirectory tmp
|
|
||||||
-- Note that due to these variables being set, Git.run and
|
|
||||||
-- similar helpers cannot be used, as they override the work tree.
|
|
||||||
-- It is only safe to use Git.run etc when doing things that do
|
|
||||||
-- not operate on the work tree.
|
|
||||||
setEnv "GIT_WORK_TREE" tmp True
|
|
||||||
setEnv "GIT_INDEX_FILE" (tmpIndex g) True
|
setEnv "GIT_INDEX_FILE" (tmpIndex g) True
|
||||||
return g
|
return g
|
||||||
|
|
||||||
cleanup :: Git.Repo -> IO ()
|
cleanup :: Git.Repo -> IO ()
|
||||||
cleanup g = do
|
cleanup g = do
|
||||||
e <- doesDirectoryExist (tmpDir g)
|
|
||||||
when e $ removeDirectoryRecursive (tmpDir g)
|
|
||||||
e' <- doesFileExist (tmpIndex g)
|
e' <- doesFileExist (tmpIndex g)
|
||||||
when e' $ removeFile (tmpIndex g)
|
when e' $ removeFile (tmpIndex g)
|
||||||
|
|
||||||
{- Stages the content of both refs into the index. -}
|
{- Stages the content of both refs into the index. -}
|
||||||
stage :: Git.Repo -> String -> String -> IO ()
|
stage :: Git.Repo -> String -> String -> IO ()
|
||||||
stage g aref bref = do
|
stage g aref bref = do
|
||||||
-- populate index with the contents of aref, as a starting point
|
-- Get the contents of aref, as a starting point.
|
||||||
_ <- system $ "git ls-tree -r --full-name --full-tree " ++ aref ++
|
ls <- fromgit
|
||||||
" | git update-index --index-info"
|
["ls-tree", "-z", "-r", "--full-tree", aref]
|
||||||
-- identify files that are different in bref, and stage merged files
|
-- Identify files that are different between aref and bref, and
|
||||||
diff <- Git.pipeNullSplit g $ map Param
|
-- inject merged versions into git.
|
||||||
["diff-tree", "--raw", "-z", "--no-renames", "-l0", aref, bref]
|
diff <- fromgit
|
||||||
mapM_ genfile (pairs diff)
|
["diff-tree", "--raw", "-z", "-r", "--no-renames", "-l0", aref, bref]
|
||||||
_ <- system "git add ."
|
ls' <- mapM mergefile (pairs diff)
|
||||||
return ()
|
-- Populate the index file. Later lines override earlier ones.
|
||||||
|
togit ["update-index", "-z", "--index-info"]
|
||||||
|
(join "\0" $ ls++catMaybes ls')
|
||||||
where
|
where
|
||||||
|
fromgit l = Git.pipeNullSplit g (map Param l)
|
||||||
|
togit l content = Git.pipeWrite g (map Param l) content
|
||||||
|
>>= forceSuccess
|
||||||
|
tofromgit l content = do
|
||||||
|
(h, s) <- Git.pipeWriteRead g (map Param l) content
|
||||||
|
length s `seq` do
|
||||||
|
forceSuccess h
|
||||||
|
Git.reap
|
||||||
|
return ((), s)
|
||||||
|
|
||||||
pairs [] = []
|
pairs [] = []
|
||||||
pairs (_:[]) = error "parse error"
|
pairs (_:[]) = error "parse error"
|
||||||
pairs (a:b:rest) = (a,b):pairs rest
|
pairs (a:b:rest) = (a,b):pairs rest
|
||||||
|
|
||||||
nullsha = take 40 $ repeat '0'
|
nullsha = take shaSize $ repeat '0'
|
||||||
|
ls_tree_line sha file = "100644 blob " ++ sha ++ "\t" ++ file
|
||||||
genfile (info, file) = do
|
unionmerge = unlines . nub . lines
|
||||||
|
|
||||||
|
mergefile (info, file) = do
|
||||||
let [_colonamode, _bmode, asha, bsha, _status] = words info
|
let [_colonamode, _bmode, asha, bsha, _status] = words info
|
||||||
let shas =
|
if bsha == nullsha
|
||||||
if bsha == nullsha
|
then return Nothing -- already staged from aref
|
||||||
then [] -- staged from aref
|
else mergefile' file asha bsha
|
||||||
else
|
mergefile' file asha bsha = do
|
||||||
if asha == nullsha
|
let shas = filter (/= nullsha) [asha, bsha]
|
||||||
then [bsha]
|
content <- Git.pipeRead g $ map Param ("show":shas)
|
||||||
else [asha, bsha]
|
sha <- getSha "hash-object" $
|
||||||
unless (null shas) $ do
|
tofromgit ["hash-object", "-w", "--stdin"] $
|
||||||
content <- Git.pipeRead g $ map Param ("show":shas)
|
unionmerge content
|
||||||
writeFile file $ unlines $ nub $ lines content
|
return $ Just $ ls_tree_line sha file
|
||||||
|
|
||||||
{- Commits the index into the specified branch. -}
|
{- Commits the index into the specified branch. -}
|
||||||
commit :: Git.Repo -> String -> String -> String -> IO ()
|
commit :: Git.Repo -> String -> String -> String -> IO ()
|
||||||
commit g branch aref bref = do
|
commit g branch aref bref = do
|
||||||
tree <- getsha $
|
tree <- getSha "write-tree" $
|
||||||
pipeFrom "git" ["write-tree"]
|
pipeFrom "git" ["write-tree"]
|
||||||
sha <- getsha $
|
sha <- getSha "commit-tree" $
|
||||||
pipeBoth "git" ["commit-tree", tree, "-p", aref, "-p", bref]
|
pipeBoth "git" ["commit-tree", tree, "-p", aref, "-p", bref]
|
||||||
"union merge"
|
"union merge"
|
||||||
Git.run g "update-ref" [Param $ "refs/heads/" ++ branch, Param sha]
|
Git.run g "update-ref" [Param $ "refs/heads/" ++ branch, Param sha]
|
||||||
where
|
|
||||||
getsha a = do
|
{- Runs an action that causes a git subcommand to emit a sha, and strips
|
||||||
(_, t) <- a
|
any trailing newline, returning the sha. -}
|
||||||
let t' = if last t == '\n'
|
getSha :: String -> IO (a, String) -> IO String
|
||||||
then take (length t - 1) t
|
getSha subcommand a = do
|
||||||
else t
|
(_, t) <- a
|
||||||
when (null t') $ error "failed to read sha from git"
|
let t' = if last t == '\n'
|
||||||
return t'
|
then take (length t - 1) t
|
||||||
|
else t
|
||||||
|
when (length t' /= shaSize) $
|
||||||
|
error $ "failed to read sha from git " ++ subcommand ++ " (" ++ t' ++ ")"
|
||||||
|
return t'
|
||||||
|
|
||||||
|
shaSize :: Int
|
||||||
|
shaSize = 40
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue