diff --git a/.gitignore b/.gitignore index b73167c925..9a4bc80de3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,8 +5,10 @@ configure SysConfig.hs git-annex git-annex-shell +git-union-merge git-annex.1 git-annex-shell.1 +git-union-merge.1 doc/.ikiwiki html *.tix diff --git a/GitRepo.hs b/GitRepo.hs index 24bc9b5c2b..0bee2842a5 100644 --- a/GitRepo.hs +++ b/GitRepo.hs @@ -38,6 +38,7 @@ module GitRepo ( gitCommandLine, run, pipeRead, + pipeNullSplit, attributes, remotes, remotesAdd, @@ -412,7 +413,7 @@ typeChangedFiles' repo l middle = pipeNullSplit repo $ start ++ middle ++ end end = [Param "--"] ++ map File l {- Reads null terminated output of a git command (as enabled by the -z - - parameter), and splits it into a list of files. -} + - parameter), and splits it into a list of files/lines/whatever. -} pipeNullSplit :: Repo -> [CommandParam] -> IO [FilePath] pipeNullSplit repo params = do fs0 <- pipeRead repo params diff --git a/Makefile b/Makefile index 286c3a6e54..915b0bf0b2 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,8 @@ GHCFLAGS=-prof -auto-all -rtsopts -caf-all -fforce-recomp $(IGNORE) endif GHCMAKE=ghc $(GHCFLAGS) --make -bins=git-annex git-annex-shell -mans=git-annex.1 git-annex-shell.1 +bins=git-annex git-annex-shell git-union-merge +mans=git-annex.1 git-annex-shell.1 git-union-merge.1 all: $(bins) $(mans) docs @@ -33,6 +33,8 @@ git-annex.1: doc/git-annex.mdwn ./mdwn2man git-annex 1 doc/git-annex.mdwn > git-annex.1 git-annex-shell.1: doc/git-annex-shell.mdwn ./mdwn2man git-annex-shell 1 doc/git-annex-shell.mdwn > git-annex-shell.1 +git-union-merge.1: doc/git-union-merge.mdwn + ./mdwn2man git-union-merge 1 doc/git-union-merge.mdwn > git-union-merge.1 install: all install -d $(DESTDIR)$(PREFIX)/bin diff --git a/debian/changelog b/debian/changelog index de012de5bd..b96b9f43d6 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,6 +6,8 @@ git-annex (0.20110611) UNRELEASED; urgency=low such as btrfs. * Allow --trust etc to specify a repository by name, for temporarily trusting repositories that are not configured remotes. + * git-union-merge: New git subcommand, that does a generic union merge + operation, and operates efficiently without touching the working tree. -- Joey Hess Mon, 13 Jun 2011 19:53:24 -0400 diff --git a/doc/git-union-merge.mdwn b/doc/git-union-merge.mdwn new file mode 100644 index 0000000000..ac8e8f7a99 --- /dev/null +++ b/doc/git-union-merge.mdwn @@ -0,0 +1,38 @@ +# NAME + +git-union-merge - Join branches together using a union merge + +# SYNOPSIS + +git union-merge branch ref ref + +# DESCRIPTION + +Does a union merge between two refs, storing the result in the +specified branch. + +The union merge will always succeed, but assumes that files can be merged +simply by concacenating together lines from all the oldrefs, in any order. +So, this is useful only for branches containing log-type data. + +That this does not touch the checked out working copy. It operates +entirely on git refs and branches. + +# EXAMPLE + + git union-merge git-annex git-annex origin/git-annex + +Merges the current git-annex branch, and a version from origin, +storing the result in the git-annex branch. + +# BUGS + +File modes are not currently merged. + +# AUTHOR + +Joey Hess + + + +Warning: this page is automatically made into a man page via [mdwn2man](http://git.ikiwiki.info/?p=ikiwiki;a=blob;f=mdwn2man;hb=HEAD). Edit with care diff --git a/doc/todo/branching.mdwn b/doc/todo/branching.mdwn index 9c44c03feb..37e7b6edd2 100644 --- a/doc/todo/branching.mdwn +++ b/doc/todo/branching.mdwn @@ -148,10 +148,8 @@ problem generically. Something like this: * For remotes, there are also `origin/B`, `otherremote/B`, etc. * To merge two branches `B` and `foo/B`, construct a merge commit that makes each file have all lines that were in either version of the file, - with duplicates removed (probably). Do this without checking out a tree, - or using a temporary directory. (One easy but expensive way is to just - check out the branch to a temp dir, union merge into it, and remove the - temp dir ... but it should be possible to do it without using a temp dir.) + with duplicates removed (probably). Do this without checking out a tree. + -- now implemented as git-union-merge * As a `post-merge` hook, merge `*/B` into `B`. This will ensure `B` is always up-to-date after a pull from a remote. * When pushing to a remote, nothing need to be done, except ensure diff --git a/git-union-merge.hs b/git-union-merge.hs new file mode 100644 index 0000000000..482f66daa0 --- /dev/null +++ b/git-union-merge.hs @@ -0,0 +1,120 @@ +{- git-union-merge program + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +import System.Environment +import System.FilePath +import System.Directory +import System.Cmd +import System.Cmd.Utils +import System.Posix.Env (setEnv) +import System.Posix.Directory (changeWorkingDirectory) +import Control.Monad (when, unless) +import Data.List + +import qualified GitRepo as Git +import Utility + +header :: String +header = "Usage: git-union-merge branch ref ref" + +usage :: IO a +usage = error $ "bad parameters\n\n" ++ header + +main :: IO () +main = do + [branch, aref, bref] <- parseArgs + g <- setup + stage g aref bref + commit g branch aref bref + cleanup g + +parseArgs :: IO [String] +parseArgs = do + args <- getArgs + if (length args /= 3) + then usage + else return args + +tmpDir :: Git.Repo -> FilePath +tmpDir g = Git.workTree g Git.gitDir g "tmp" "git-union-merge" + +tmpIndex :: Git.Repo -> FilePath +tmpIndex g = Git.workTree g Git.gitDir g "tmp" "git-union-merge.index" + +{- Moves to a temporary directory, and configures git to use it as its + - working tree, and to use a temporary index file as well. -} +setup :: IO Git.Repo +setup = do + g <- Git.configRead =<< Git.repoFromCwd + cleanup g -- idempotency + let tmp = tmpDir g + createDirectoryIfMissing True tmp + changeWorkingDirectory tmp + -- Note that due to these variables being set, Git.run and + -- similar helpers cannot be used, as they override the work tree. + -- It is only safe to use Git.run etc when doing things that do + -- not operate on the work tree. + setEnv "GIT_WORK_TREE" tmp True + setEnv "GIT_INDEX_FILE" (tmpIndex g) True + return g + +cleanup :: Git.Repo -> IO () +cleanup g = do + e <- doesDirectoryExist (tmpDir g) + when e $ removeDirectoryRecursive (tmpDir g) + e' <- doesFileExist (tmpIndex g) + when e' $ removeFile (tmpIndex g) + +{- Stages the content of both refs into the index. -} +stage :: Git.Repo -> String -> String -> IO () +stage g aref bref = do + -- populate index with the contents of aref, as a starting point + _ <- system $ "git ls-tree -r --full-name --full-tree " ++ aref ++ + " | git update-index --index-info" + -- identify files that are different in bref, and stage merged files + diff <- Git.pipeNullSplit g $ map Param + ["diff-tree", "--raw", "-z", "--no-renames", "-l0", aref, bref] + mapM_ genfile (pairs diff) + _ <- system "git add ." + return () + where + pairs [] = [] + pairs (_:[]) = error "parse error" + pairs (a:b:rest) = (a,b):pairs rest + + nullsha = take 40 $ repeat '0' + + genfile (info, file) = do + let [_colonamode, _bmode, asha, bsha, _status] = words info + let shas = + if bsha == nullsha + then [] -- staged from aref + else + if asha == nullsha + then [bsha] + else [asha, bsha] + unless (null shas) $ do + content <- Git.pipeRead g $ map Param ("show":shas) + writeFile file $ unlines $ nub $ lines content + +{- Commits the index into the specified branch. -} +commit :: Git.Repo -> String -> String -> String -> IO () +commit g branch aref bref = do + tree <- getsha $ + pipeFrom "git" ["write-tree"] + sha <- getsha $ + pipeBoth "git" ["commit-tree", tree, "-p", aref, "-p", bref] + "union merge" + Git.run g "update-ref" [Param $ "refs/heads/" ++ branch, Param sha] + where + getsha a = do + (_, t) <- a + let t' = if last t == '\n' + then take (length t - 1) t + else t + when (null t') $ error "failed to read sha from git" + return t'