Optimised union merging; now only runs git cat-file once.
This commit is contained in:
		
					parent
					
						
							
								cea65b9e5b
							
						
					
				
			
			
				commit
				
					
						04edae6791
					
				
			
		
					 7 changed files with 62 additions and 36 deletions
				
			
		| 
						 | 
					@ -149,7 +149,8 @@ update = onceonly $ do
 | 
				
			||||||
			 - documentation advises users not to directly
 | 
								 - documentation advises users not to directly
 | 
				
			||||||
			 - modify the branch.
 | 
								 - modify the branch.
 | 
				
			||||||
			 -}
 | 
								 -}
 | 
				
			||||||
			inRepo $ \g -> Git.UnionMerge.merge_index g branches
 | 
								h <- catFileHandle
 | 
				
			||||||
 | 
								inRepo $ \g -> Git.UnionMerge.merge_index h g branches
 | 
				
			||||||
		ff <- if dirty then return False else tryFastForwardTo refs
 | 
							ff <- if dirty then return False else tryFastForwardTo refs
 | 
				
			||||||
		unless ff $ inRepo $
 | 
							unless ff $ inRepo $
 | 
				
			||||||
			Git.commit merge_desc fullname (nub $ fullname:refs)
 | 
								Git.commit merge_desc fullname (nub $ fullname:refs)
 | 
				
			||||||
| 
						 | 
					@ -280,7 +281,7 @@ get' staleok file = fromcache =<< getCache file
 | 
				
			||||||
		fromjournal Nothing
 | 
							fromjournal Nothing
 | 
				
			||||||
			| staleok = withIndex frombranch
 | 
								| staleok = withIndex frombranch
 | 
				
			||||||
			| otherwise = withIndexUpdate $ frombranch >>= cache
 | 
								| otherwise = withIndexUpdate $ frombranch >>= cache
 | 
				
			||||||
		frombranch = catFile fullname file
 | 
							frombranch = L.unpack <$> catFile fullname file
 | 
				
			||||||
		cache content = do
 | 
							cache content = do
 | 
				
			||||||
			setCache file content
 | 
								setCache file content
 | 
				
			||||||
			return content
 | 
								return content
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,18 +6,25 @@
 | 
				
			||||||
 -}
 | 
					 -}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
module Annex.CatFile (
 | 
					module Annex.CatFile (
 | 
				
			||||||
	catFile
 | 
						catFile,
 | 
				
			||||||
 | 
						catFileHandle
 | 
				
			||||||
) where
 | 
					) where
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import qualified Data.ByteString.Lazy.Char8 as L
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Common.Annex
 | 
					import Common.Annex
 | 
				
			||||||
import qualified Git.CatFile
 | 
					import qualified Git.CatFile
 | 
				
			||||||
import qualified Annex
 | 
					import qualified Annex
 | 
				
			||||||
 | 
					
 | 
				
			||||||
catFile :: String -> FilePath -> Annex String
 | 
					catFile :: String -> FilePath -> Annex L.ByteString
 | 
				
			||||||
catFile branch file = maybe startup go =<< Annex.getState Annex.catfilehandle
 | 
					catFile branch file = do
 | 
				
			||||||
 | 
						h <- catFileHandle
 | 
				
			||||||
 | 
						liftIO $ Git.CatFile.catFile h branch file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					catFileHandle :: Annex Git.CatFile.CatFileHandle
 | 
				
			||||||
 | 
					catFileHandle = maybe startup return =<< Annex.getState Annex.catfilehandle
 | 
				
			||||||
	where
 | 
						where
 | 
				
			||||||
		startup = do
 | 
							startup = do
 | 
				
			||||||
			h <- inRepo Git.CatFile.catFileStart
 | 
								h <- inRepo Git.CatFile.catFileStart
 | 
				
			||||||
			Annex.changeState $ \s -> s { Annex.catfilehandle = Just h }
 | 
								Annex.changeState $ \s -> s { Annex.catfilehandle = Just h }
 | 
				
			||||||
			go h
 | 
								return h
 | 
				
			||||||
		go h = liftIO $ Git.CatFile.catFile h branch file
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -197,7 +197,7 @@ getKeysReferencedInGit ref = do
 | 
				
			||||||
		findkeys c (l:ls)
 | 
							findkeys c (l:ls)
 | 
				
			||||||
			| isSymLink (LsTree.mode l) = do
 | 
								| isSymLink (LsTree.mode l) = do
 | 
				
			||||||
				content <- catFile ref $ LsTree.file l
 | 
									content <- catFile ref $ LsTree.file l
 | 
				
			||||||
				case fileKey (takeFileName content) of
 | 
									case fileKey (takeFileName $ L.unpack content) of
 | 
				
			||||||
					Nothing -> findkeys c ls
 | 
										Nothing -> findkeys c ls
 | 
				
			||||||
					Just k -> findkeys (k:c) ls
 | 
										Just k -> findkeys (k:c) ls
 | 
				
			||||||
			| otherwise = findkeys c ls
 | 
								| otherwise = findkeys c ls
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9,13 +9,15 @@ module Git.CatFile (
 | 
				
			||||||
	CatFileHandle,
 | 
						CatFileHandle,
 | 
				
			||||||
	catFileStart,
 | 
						catFileStart,
 | 
				
			||||||
	catFileStop,
 | 
						catFileStop,
 | 
				
			||||||
	catFile
 | 
						catFile,
 | 
				
			||||||
 | 
						catObject
 | 
				
			||||||
) where
 | 
					) where
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Control.Monad.State
 | 
					import Control.Monad.State
 | 
				
			||||||
import System.Cmd.Utils
 | 
					import System.Cmd.Utils
 | 
				
			||||||
import System.IO
 | 
					import System.IO
 | 
				
			||||||
import qualified Data.ByteString.Char8 as B
 | 
					import qualified Data.ByteString.Char8 as S
 | 
				
			||||||
 | 
					import qualified Data.ByteString.Lazy.Char8 as L
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Git
 | 
					import Git
 | 
				
			||||||
import Utility.SafeCommand
 | 
					import Utility.SafeCommand
 | 
				
			||||||
| 
						 | 
					@ -34,30 +36,38 @@ catFileStop (pid, from, to) = do
 | 
				
			||||||
	hClose from
 | 
						hClose from
 | 
				
			||||||
	forceSuccess pid
 | 
						forceSuccess pid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- Uses a running git cat-file read the content of a file from a branch.
 | 
					{- Reads a file from a specified branch. -}
 | 
				
			||||||
 - Files that do not exist on the branch will have "" returned. -}
 | 
					catFile :: CatFileHandle -> String -> FilePath -> IO L.ByteString
 | 
				
			||||||
catFile :: CatFileHandle -> String -> FilePath -> IO String
 | 
					catFile h branch file = catObject h (branch ++ ":" ++ file)
 | 
				
			||||||
catFile (_, from, to) branch file = do
 | 
					
 | 
				
			||||||
	hPutStrLn to want
 | 
					{- Uses a running git cat-file read the content of an object.
 | 
				
			||||||
 | 
					 - Objects that do not exist will have "" returned. -}
 | 
				
			||||||
 | 
					catObject :: CatFileHandle -> String -> IO L.ByteString
 | 
				
			||||||
 | 
					catObject (_, from, to) object = do
 | 
				
			||||||
 | 
						hPutStrLn to object
 | 
				
			||||||
	hFlush to
 | 
						hFlush to
 | 
				
			||||||
	header <- hGetLine from
 | 
						header <- hGetLine from
 | 
				
			||||||
	case words header of
 | 
						case words header of
 | 
				
			||||||
		[sha, blob, size]
 | 
							[sha, objtype, size]
 | 
				
			||||||
			| length sha == Git.shaSize &&
 | 
								| length sha == Git.shaSize &&
 | 
				
			||||||
			  blob == "blob" -> handle size
 | 
								  validobjtype objtype -> handle size
 | 
				
			||||||
			| otherwise -> empty
 | 
								| otherwise -> empty
 | 
				
			||||||
		_
 | 
							_
 | 
				
			||||||
			| header == want ++ " missing" -> empty
 | 
								| header == object ++ " missing" -> empty
 | 
				
			||||||
			| otherwise -> error $ "unknown response from git cat-file " ++ header
 | 
								| otherwise -> error $ "unknown response from git cat-file " ++ header
 | 
				
			||||||
	where
 | 
						where
 | 
				
			||||||
		want = branch ++ ":" ++ file
 | 
					 | 
				
			||||||
		handle size = case reads size of
 | 
							handle size = case reads size of
 | 
				
			||||||
			[(bytes, "")] -> readcontent bytes
 | 
								[(bytes, "")] -> readcontent bytes
 | 
				
			||||||
			_ -> empty
 | 
								_ -> empty
 | 
				
			||||||
		readcontent bytes = do
 | 
							readcontent bytes = do
 | 
				
			||||||
			content <- B.hGet from bytes
 | 
								content <- S.hGet from bytes
 | 
				
			||||||
			c <- hGetChar from
 | 
								c <- hGetChar from
 | 
				
			||||||
			when (c /= '\n') $
 | 
								when (c /= '\n') $
 | 
				
			||||||
				error "missing newline from git cat-file"
 | 
									error "missing newline from git cat-file"
 | 
				
			||||||
			return $ B.unpack content
 | 
								return $ L.fromChunks [content]
 | 
				
			||||||
		empty = return ""
 | 
							empty = return L.empty
 | 
				
			||||||
 | 
							validobjtype t
 | 
				
			||||||
 | 
								| t == "blob" = True
 | 
				
			||||||
 | 
								| t == "commit" = True
 | 
				
			||||||
 | 
								| t == "tree" = True
 | 
				
			||||||
 | 
								| otherwise = False
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -21,6 +21,7 @@ import qualified Data.ByteString.Lazy.Char8 as L
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Common
 | 
					import Common
 | 
				
			||||||
import Git
 | 
					import Git
 | 
				
			||||||
 | 
					import Git.CatFile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- Performs a union merge between two branches, staging it in the index.
 | 
					{- Performs a union merge between two branches, staging it in the index.
 | 
				
			||||||
 - Any previously staged changes in the index will be lost.
 | 
					 - Any previously staged changes in the index will be lost.
 | 
				
			||||||
| 
						 | 
					@ -30,14 +31,16 @@ import Git
 | 
				
			||||||
merge :: String -> String -> Repo -> IO ()
 | 
					merge :: String -> String -> Repo -> IO ()
 | 
				
			||||||
merge x y repo = do
 | 
					merge x y repo = do
 | 
				
			||||||
	a <- ls_tree x repo
 | 
						a <- ls_tree x repo
 | 
				
			||||||
	b <- merge_trees x y repo
 | 
						h <- catFileStart repo
 | 
				
			||||||
 | 
						b <- merge_trees x y h repo
 | 
				
			||||||
 | 
						catFileStop h
 | 
				
			||||||
	update_index repo (a++b)
 | 
						update_index repo (a++b)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- Merges a list of branches into the index. Previously staged changed in
 | 
					{- Merges a list of branches into the index. Previously staged changed in
 | 
				
			||||||
 - the index are preserved (and participate in the merge). -}
 | 
					 - the index are preserved (and participate in the merge). -}
 | 
				
			||||||
merge_index :: Repo -> [String] -> IO ()
 | 
					merge_index :: CatFileHandle -> Repo -> [String] -> IO ()
 | 
				
			||||||
merge_index repo bs =
 | 
					merge_index h repo bs =
 | 
				
			||||||
	update_index repo =<< concat <$> mapM (`merge_tree_index` repo) bs
 | 
						update_index repo =<< concat <$> mapM (\b -> merge_tree_index b h repo) bs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- Feeds a list into update-index. Later items in the list can override
 | 
					{- Feeds a list into update-index. Later items in the list can override
 | 
				
			||||||
 - earlier ones, so the list can be generated from any combination of
 | 
					 - earlier ones, so the list can be generated from any combination of
 | 
				
			||||||
| 
						 | 
					@ -60,22 +63,22 @@ ls_tree x = pipeNullSplit params
 | 
				
			||||||
		params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
 | 
							params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- For merging two trees. -}
 | 
					{- For merging two trees. -}
 | 
				
			||||||
merge_trees :: String -> String -> Repo -> IO [String]
 | 
					merge_trees :: String -> String -> CatFileHandle -> Repo -> IO [String]
 | 
				
			||||||
merge_trees x y = calc_merge $ "diff-tree":diff_opts ++ [x, y]
 | 
					merge_trees x y h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- For merging a single tree into the index. -}
 | 
					{- For merging a single tree into the index. -}
 | 
				
			||||||
merge_tree_index :: String -> Repo -> IO [String]
 | 
					merge_tree_index :: String -> CatFileHandle -> Repo -> IO [String]
 | 
				
			||||||
merge_tree_index x = calc_merge $ "diff-index":diff_opts ++ ["--cached", x]
 | 
					merge_tree_index x h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
diff_opts :: [String]
 | 
					diff_opts :: [String]
 | 
				
			||||||
diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"]
 | 
					diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- Calculates how to perform a merge, using git to get a raw diff,
 | 
					{- Calculates how to perform a merge, using git to get a raw diff,
 | 
				
			||||||
 - and returning a list suitable for update_index. -}
 | 
					 - and returning a list suitable for update_index. -}
 | 
				
			||||||
calc_merge :: [String] -> Repo -> IO [String]
 | 
					calc_merge :: CatFileHandle -> [String] -> Repo -> IO [String]
 | 
				
			||||||
calc_merge differ repo = do
 | 
					calc_merge h differ repo = do
 | 
				
			||||||
	diff <- pipeNullSplit (map Param differ) repo
 | 
						diff <- pipeNullSplit (map Param differ) repo
 | 
				
			||||||
	l <- mapM (\p -> mergeFile p repo) (pairs diff)
 | 
						l <- mapM (\p -> mergeFile p h repo) (pairs diff)
 | 
				
			||||||
	return $ catMaybes l
 | 
						return $ catMaybes l
 | 
				
			||||||
	where
 | 
						where
 | 
				
			||||||
		pairs [] = []
 | 
							pairs [] = []
 | 
				
			||||||
| 
						 | 
					@ -97,12 +100,12 @@ hashObject content repo = getSha subcmd $ do
 | 
				
			||||||
{- Given an info line from a git raw diff, and the filename, generates
 | 
					{- Given an info line from a git raw diff, and the filename, generates
 | 
				
			||||||
 - a line suitable for update_index that union merges the two sides of the
 | 
					 - a line suitable for update_index that union merges the two sides of the
 | 
				
			||||||
 - diff. -}
 | 
					 - diff. -}
 | 
				
			||||||
mergeFile :: (String, FilePath) -> Repo -> IO (Maybe String)
 | 
					mergeFile :: (String, FilePath) -> CatFileHandle -> Repo -> IO (Maybe String)
 | 
				
			||||||
mergeFile (info, file) repo = case filter (/= nullsha) [asha, bsha] of
 | 
					mergeFile (info, file) h repo = case filter (/= nullsha) [asha, bsha] of
 | 
				
			||||||
	[] -> return Nothing
 | 
						[] -> return Nothing
 | 
				
			||||||
	(sha:[]) -> return $ Just $ update_index_line sha file
 | 
						(sha:[]) -> return $ Just $ update_index_line sha file
 | 
				
			||||||
	shas -> do
 | 
						shas -> do
 | 
				
			||||||
		content <- pipeRead (map Param ("show":shas)) repo
 | 
							content <- L.concat <$> mapM (catObject h) shas
 | 
				
			||||||
		sha <- hashObject (unionmerge content) repo
 | 
							sha <- hashObject (unionmerge content) repo
 | 
				
			||||||
		return $ Just $ update_index_line sha file
 | 
							return $ Just $ update_index_line sha file
 | 
				
			||||||
	where
 | 
						where
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										1
									
								
								debian/changelog
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								debian/changelog
									
										
									
									
										vendored
									
									
								
							| 
						 | 
					@ -6,6 +6,7 @@ git-annex (3.20111112) UNRELEASED; urgency=low
 | 
				
			||||||
    no longer needs to auto-merge.
 | 
					    no longer needs to auto-merge.
 | 
				
			||||||
  * init: When run in an already initalized repository, and without
 | 
					  * init: When run in an already initalized repository, and without
 | 
				
			||||||
    a description specified, don't delete the old description. 
 | 
					    a description specified, don't delete the old description. 
 | 
				
			||||||
 | 
					  * Optimised union merging; now only runs git cat-file once.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 -- Joey Hess <joeyh@debian.org>  Sat, 12 Nov 2011 14:50:21 -0400
 | 
					 -- Joey Hess <joeyh@debian.org>  Sat, 12 Nov 2011 14:50:21 -0400
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,6 +8,10 @@ Instead, I'd like a way to stream multiple objects into git using stdin.
 | 
				
			||||||
Sometime, should look at either extending git-hash-object to support that,
 | 
					Sometime, should look at either extending git-hash-object to support that,
 | 
				
			||||||
or possibly look at using git-fast-import instead.
 | 
					or possibly look at using git-fast-import instead.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					--- 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
`git-annex merge` also runs `git show` once per file that needs to be
 | 
					`git-annex merge` also runs `git show` once per file that needs to be
 | 
				
			||||||
merged. This could be reduced to a single call to `git-cat-file --batch`,
 | 
					merged. This could be reduced to a single call to `git-cat-file --batch`,
 | 
				
			||||||
There is already a Git.CatFile library that can do this easily. --[[Joey]]
 | 
					There is already a Git.CatFile library that can do this easily. --[[Joey]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					> This is now done, part above remains todo. --[[Joey]] 
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue