merge: Now runs in constant space.
Before, a merge was first calculated, by running various actions that called git and built up a list of lines, which were at the end sent to git update-index. This necessarily used space proportional to the size of the diff between the trees being merged. Now, lines are streamed into git update-index from each of the actions in turn. Runtime size of git-annex merge when merging 50000 location log files drops from around 100 mb to a constant 4 mb. Presumably it runs quite a lot faster, too.
This commit is contained in:
parent
922e9af528
commit
21a925dcf1
3 changed files with 34 additions and 25 deletions
|
@ -56,7 +56,8 @@ index g = gitAnnexDir g </> "index"
|
||||||
- and merge in changes from other branches.
|
- and merge in changes from other branches.
|
||||||
-}
|
-}
|
||||||
genIndex :: Git.Repo -> IO ()
|
genIndex :: Git.Repo -> IO ()
|
||||||
genIndex g = Git.UnionMerge.ls_tree fullname g >>= Git.UnionMerge.update_index g
|
genIndex g = Git.UnionMerge.update_index_via g
|
||||||
|
[Git.UnionMerge.ls_tree fullname g]
|
||||||
|
|
||||||
{- Runs an action using the branch's index file. -}
|
{- Runs an action using the branch's index file. -}
|
||||||
withIndex :: Annex a -> Annex a
|
withIndex :: Annex a -> Annex a
|
||||||
|
|
|
@ -9,13 +9,13 @@ module Git.UnionMerge (
|
||||||
merge,
|
merge,
|
||||||
merge_index,
|
merge_index,
|
||||||
update_index,
|
update_index,
|
||||||
|
update_index_via,
|
||||||
update_index_line,
|
update_index_line,
|
||||||
ls_tree
|
ls_tree
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import System.Cmd.Utils
|
import System.Cmd.Utils
|
||||||
import Data.List
|
import Data.List
|
||||||
import Data.Maybe
|
|
||||||
import qualified Data.ByteString.Lazy.Char8 as L
|
import qualified Data.ByteString.Lazy.Char8 as L
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
|
@ -29,47 +29,56 @@ import Git.CatFile
|
||||||
-}
|
-}
|
||||||
merge :: String -> String -> Repo -> IO ()
|
merge :: String -> String -> Repo -> IO ()
|
||||||
merge x y repo = do
|
merge x y repo = do
|
||||||
a <- ls_tree x repo
|
|
||||||
h <- catFileStart repo
|
h <- catFileStart repo
|
||||||
b <- merge_trees x y h repo
|
update_index_via repo
|
||||||
|
[ ls_tree x repo
|
||||||
|
, merge_trees x y h repo
|
||||||
|
]
|
||||||
catFileStop h
|
catFileStop h
|
||||||
update_index repo (a++b)
|
|
||||||
|
|
||||||
{- Merges a list of branches into the index. Previously staged changed in
|
{- Merges a list of branches into the index. Previously staged changed in
|
||||||
- the index are preserved (and participate in the merge). -}
|
- the index are preserved (and participate in the merge). -}
|
||||||
merge_index :: CatFileHandle -> Repo -> [String] -> IO ()
|
merge_index :: CatFileHandle -> Repo -> [String] -> IO ()
|
||||||
merge_index h repo bs =
|
merge_index h repo bs =
|
||||||
update_index repo =<< concat <$> mapM (\b -> merge_tree_index b h repo) bs
|
update_index_via repo $ map (\b -> merge_tree_index b h repo) bs
|
||||||
|
|
||||||
{- Feeds a list into update-index. Later items in the list can override
|
update_index :: Repo -> [String] -> IO ()
|
||||||
|
update_index repo ls = update_index_via repo [\h -> mapM_ (sendContent h) ls]
|
||||||
|
|
||||||
|
{- Feeds content into update-index. Later items in the list can override
|
||||||
- earlier ones, so the list can be generated from any combination of
|
- earlier ones, so the list can be generated from any combination of
|
||||||
- ls_tree, merge_trees, and merge_tree_index. -}
|
- ls_tree, merge_trees, and merge_tree_index. -}
|
||||||
update_index :: Repo -> [String] -> IO ()
|
update_index_via :: Repo -> [Handle -> IO ()] -> IO ()
|
||||||
update_index repo l = do
|
update_index_via repo ls = do
|
||||||
(p, h) <- hPipeTo "git" (toCommand $ Git.gitCommandLine params repo)
|
(p, h) <- hPipeTo "git" (toCommand $ Git.gitCommandLine params repo)
|
||||||
mapM_ (\s -> hPutStr h s >> hPutStr h "\0") l
|
forM_ ls $ \l -> l h
|
||||||
hClose h
|
hClose h
|
||||||
forceSuccess p
|
forceSuccess p
|
||||||
where
|
where
|
||||||
params = map Param ["update-index", "-z", "--index-info"]
|
params = map Param ["update-index", "-z", "--index-info"]
|
||||||
|
|
||||||
|
sendContent :: Handle -> String -> IO ()
|
||||||
|
sendContent h s = do
|
||||||
|
hPutStr h s
|
||||||
|
hPutStr h "\0"
|
||||||
|
|
||||||
{- Generates a line suitable to be fed into update-index, to add
|
{- Generates a line suitable to be fed into update-index, to add
|
||||||
- a given file with a given sha. -}
|
- a given file with a given sha. -}
|
||||||
update_index_line :: String -> FilePath -> String
|
update_index_line :: String -> FilePath -> String
|
||||||
update_index_line sha file = "100644 blob " ++ sha ++ "\t" ++ file
|
update_index_line sha file = "100644 blob " ++ sha ++ "\t" ++ file
|
||||||
|
|
||||||
{- Gets the contents of a tree in a format suitable for update_index. -}
|
{- Gets the contents of a tree. -}
|
||||||
ls_tree :: String -> Repo -> IO [String]
|
ls_tree :: String -> Repo -> Handle -> IO ()
|
||||||
ls_tree x = pipeNullSplit params
|
ls_tree x repo h = mapM_ (sendContent h) =<< pipeNullSplit params repo
|
||||||
where
|
where
|
||||||
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
|
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
|
||||||
|
|
||||||
{- For merging two trees. -}
|
{- For merging two trees. -}
|
||||||
merge_trees :: String -> String -> CatFileHandle -> Repo -> IO [String]
|
merge_trees :: String -> String -> CatFileHandle -> Repo -> Handle -> IO ()
|
||||||
merge_trees x y h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
|
merge_trees x y h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
|
||||||
|
|
||||||
{- For merging a single tree into the index. -}
|
{- For merging a single tree into the index. -}
|
||||||
merge_tree_index :: String -> CatFileHandle -> Repo -> IO [String]
|
merge_tree_index :: String -> CatFileHandle -> Repo -> Handle -> IO ()
|
||||||
merge_tree_index x h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x]
|
merge_tree_index x h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x]
|
||||||
|
|
||||||
diff_opts :: [String]
|
diff_opts :: [String]
|
||||||
|
@ -77,21 +86,19 @@ diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"]
|
||||||
|
|
||||||
{- Calculates how to perform a merge, using git to get a raw diff,
|
{- Calculates how to perform a merge, using git to get a raw diff,
|
||||||
- and returning a list suitable for update_index. -}
|
- and returning a list suitable for update_index. -}
|
||||||
calc_merge :: CatFileHandle -> [String] -> Repo -> IO [String]
|
calc_merge :: CatFileHandle -> [String] -> Repo -> Handle -> IO ()
|
||||||
calc_merge h differ repo = do
|
calc_merge ch differ repo ih = pipeNullSplit (map Param differ) repo >>= go
|
||||||
diff <- pipeNullSplit (map Param differ) repo
|
|
||||||
l <- mapM (\p -> mergeFile p h repo) (pairs diff)
|
|
||||||
return $ catMaybes l
|
|
||||||
where
|
where
|
||||||
pairs [] = []
|
go [] = return ()
|
||||||
pairs (_:[]) = error "calc_merge parse error"
|
go (info:file:rest) = mergeFile info file ch repo >>=
|
||||||
pairs (a:b:rest) = (a,b):pairs rest
|
maybe (go rest) (\l -> sendContent ih l >> go rest)
|
||||||
|
go (_:[]) = error "calc_merge parse error"
|
||||||
|
|
||||||
{- Given an info line from a git raw diff, and the filename, generates
|
{- Given an info line from a git raw diff, and the filename, generates
|
||||||
- a line suitable for update_index that union merges the two sides of the
|
- a line suitable for update_index that union merges the two sides of the
|
||||||
- diff. -}
|
- diff. -}
|
||||||
mergeFile :: (String, FilePath) -> CatFileHandle -> Repo -> IO (Maybe String)
|
mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String)
|
||||||
mergeFile (info, file) h repo = case filter (/= nullsha) [asha, bsha] of
|
mergeFile info file h repo = case filter (/= nullsha) [asha, bsha] of
|
||||||
[] -> return Nothing
|
[] -> return Nothing
|
||||||
(sha:[]) -> return $ Just $ update_index_line sha file
|
(sha:[]) -> return $ Just $ update_index_line sha file
|
||||||
shas -> do
|
shas -> do
|
||||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -13,6 +13,7 @@ git-annex (3.20111112) UNRELEASED; urgency=low
|
||||||
displayed)
|
displayed)
|
||||||
* status: --fast is back
|
* status: --fast is back
|
||||||
* Fix support for insteadOf url remapping. Closes: #644278
|
* Fix support for insteadOf url remapping. Closes: #644278
|
||||||
|
* merge: Now runs in constant space.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Sat, 12 Nov 2011 14:50:21 -0400
|
-- Joey Hess <joeyh@debian.org> Sat, 12 Nov 2011 14:50:21 -0400
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue