Optimised union merging; now only runs git cat-file once.
This commit is contained in:
parent
cea65b9e5b
commit
04edae6791
7 changed files with 62 additions and 36 deletions
|
@ -149,7 +149,8 @@ update = onceonly $ do
|
||||||
- documentation advises users not to directly
|
- documentation advises users not to directly
|
||||||
- modify the branch.
|
- modify the branch.
|
||||||
-}
|
-}
|
||||||
inRepo $ \g -> Git.UnionMerge.merge_index g branches
|
h <- catFileHandle
|
||||||
|
inRepo $ \g -> Git.UnionMerge.merge_index h g branches
|
||||||
ff <- if dirty then return False else tryFastForwardTo refs
|
ff <- if dirty then return False else tryFastForwardTo refs
|
||||||
unless ff $ inRepo $
|
unless ff $ inRepo $
|
||||||
Git.commit merge_desc fullname (nub $ fullname:refs)
|
Git.commit merge_desc fullname (nub $ fullname:refs)
|
||||||
|
@ -280,7 +281,7 @@ get' staleok file = fromcache =<< getCache file
|
||||||
fromjournal Nothing
|
fromjournal Nothing
|
||||||
| staleok = withIndex frombranch
|
| staleok = withIndex frombranch
|
||||||
| otherwise = withIndexUpdate $ frombranch >>= cache
|
| otherwise = withIndexUpdate $ frombranch >>= cache
|
||||||
frombranch = catFile fullname file
|
frombranch = L.unpack <$> catFile fullname file
|
||||||
cache content = do
|
cache content = do
|
||||||
setCache file content
|
setCache file content
|
||||||
return content
|
return content
|
||||||
|
|
|
@ -6,18 +6,25 @@
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Annex.CatFile (
|
module Annex.CatFile (
|
||||||
catFile
|
catFile,
|
||||||
|
catFileHandle
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as L
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import qualified Git.CatFile
|
import qualified Git.CatFile
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
|
|
||||||
catFile :: String -> FilePath -> Annex String
|
catFile :: String -> FilePath -> Annex L.ByteString
|
||||||
catFile branch file = maybe startup go =<< Annex.getState Annex.catfilehandle
|
catFile branch file = do
|
||||||
|
h <- catFileHandle
|
||||||
|
liftIO $ Git.CatFile.catFile h branch file
|
||||||
|
|
||||||
|
catFileHandle :: Annex Git.CatFile.CatFileHandle
|
||||||
|
catFileHandle = maybe startup return =<< Annex.getState Annex.catfilehandle
|
||||||
where
|
where
|
||||||
startup = do
|
startup = do
|
||||||
h <- inRepo Git.CatFile.catFileStart
|
h <- inRepo Git.CatFile.catFileStart
|
||||||
Annex.changeState $ \s -> s { Annex.catfilehandle = Just h }
|
Annex.changeState $ \s -> s { Annex.catfilehandle = Just h }
|
||||||
go h
|
return h
|
||||||
go h = liftIO $ Git.CatFile.catFile h branch file
|
|
||||||
|
|
|
@ -197,7 +197,7 @@ getKeysReferencedInGit ref = do
|
||||||
findkeys c (l:ls)
|
findkeys c (l:ls)
|
||||||
| isSymLink (LsTree.mode l) = do
|
| isSymLink (LsTree.mode l) = do
|
||||||
content <- catFile ref $ LsTree.file l
|
content <- catFile ref $ LsTree.file l
|
||||||
case fileKey (takeFileName content) of
|
case fileKey (takeFileName $ L.unpack content) of
|
||||||
Nothing -> findkeys c ls
|
Nothing -> findkeys c ls
|
||||||
Just k -> findkeys (k:c) ls
|
Just k -> findkeys (k:c) ls
|
||||||
| otherwise = findkeys c ls
|
| otherwise = findkeys c ls
|
||||||
|
|
|
@ -9,13 +9,15 @@ module Git.CatFile (
|
||||||
CatFileHandle,
|
CatFileHandle,
|
||||||
catFileStart,
|
catFileStart,
|
||||||
catFileStop,
|
catFileStop,
|
||||||
catFile
|
catFile,
|
||||||
|
catObject
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad.State
|
import Control.Monad.State
|
||||||
import System.Cmd.Utils
|
import System.Cmd.Utils
|
||||||
import System.IO
|
import System.IO
|
||||||
import qualified Data.ByteString.Char8 as B
|
import qualified Data.ByteString.Char8 as S
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as L
|
||||||
|
|
||||||
import Git
|
import Git
|
||||||
import Utility.SafeCommand
|
import Utility.SafeCommand
|
||||||
|
@ -34,30 +36,38 @@ catFileStop (pid, from, to) = do
|
||||||
hClose from
|
hClose from
|
||||||
forceSuccess pid
|
forceSuccess pid
|
||||||
|
|
||||||
{- Uses a running git cat-file read the content of a file from a branch.
|
{- Reads a file from a specified branch. -}
|
||||||
- Files that do not exist on the branch will have "" returned. -}
|
catFile :: CatFileHandle -> String -> FilePath -> IO L.ByteString
|
||||||
catFile :: CatFileHandle -> String -> FilePath -> IO String
|
catFile h branch file = catObject h (branch ++ ":" ++ file)
|
||||||
catFile (_, from, to) branch file = do
|
|
||||||
hPutStrLn to want
|
{- Uses a running git cat-file read the content of an object.
|
||||||
|
- Objects that do not exist will have "" returned. -}
|
||||||
|
catObject :: CatFileHandle -> String -> IO L.ByteString
|
||||||
|
catObject (_, from, to) object = do
|
||||||
|
hPutStrLn to object
|
||||||
hFlush to
|
hFlush to
|
||||||
header <- hGetLine from
|
header <- hGetLine from
|
||||||
case words header of
|
case words header of
|
||||||
[sha, blob, size]
|
[sha, objtype, size]
|
||||||
| length sha == Git.shaSize &&
|
| length sha == Git.shaSize &&
|
||||||
blob == "blob" -> handle size
|
validobjtype objtype -> handle size
|
||||||
| otherwise -> empty
|
| otherwise -> empty
|
||||||
_
|
_
|
||||||
| header == want ++ " missing" -> empty
|
| header == object ++ " missing" -> empty
|
||||||
| otherwise -> error $ "unknown response from git cat-file " ++ header
|
| otherwise -> error $ "unknown response from git cat-file " ++ header
|
||||||
where
|
where
|
||||||
want = branch ++ ":" ++ file
|
|
||||||
handle size = case reads size of
|
handle size = case reads size of
|
||||||
[(bytes, "")] -> readcontent bytes
|
[(bytes, "")] -> readcontent bytes
|
||||||
_ -> empty
|
_ -> empty
|
||||||
readcontent bytes = do
|
readcontent bytes = do
|
||||||
content <- B.hGet from bytes
|
content <- S.hGet from bytes
|
||||||
c <- hGetChar from
|
c <- hGetChar from
|
||||||
when (c /= '\n') $
|
when (c /= '\n') $
|
||||||
error "missing newline from git cat-file"
|
error "missing newline from git cat-file"
|
||||||
return $ B.unpack content
|
return $ L.fromChunks [content]
|
||||||
empty = return ""
|
empty = return L.empty
|
||||||
|
validobjtype t
|
||||||
|
| t == "blob" = True
|
||||||
|
| t == "commit" = True
|
||||||
|
| t == "tree" = True
|
||||||
|
| otherwise = False
|
||||||
|
|
|
@ -21,6 +21,7 @@ import qualified Data.ByteString.Lazy.Char8 as L
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
import Git
|
import Git
|
||||||
|
import Git.CatFile
|
||||||
|
|
||||||
{- Performs a union merge between two branches, staging it in the index.
|
{- Performs a union merge between two branches, staging it in the index.
|
||||||
- Any previously staged changes in the index will be lost.
|
- Any previously staged changes in the index will be lost.
|
||||||
|
@ -30,14 +31,16 @@ import Git
|
||||||
merge :: String -> String -> Repo -> IO ()
|
merge :: String -> String -> Repo -> IO ()
|
||||||
merge x y repo = do
|
merge x y repo = do
|
||||||
a <- ls_tree x repo
|
a <- ls_tree x repo
|
||||||
b <- merge_trees x y repo
|
h <- catFileStart repo
|
||||||
|
b <- merge_trees x y h repo
|
||||||
|
catFileStop h
|
||||||
update_index repo (a++b)
|
update_index repo (a++b)
|
||||||
|
|
||||||
{- Merges a list of branches into the index. Previously staged changed in
|
{- Merges a list of branches into the index. Previously staged changed in
|
||||||
- the index are preserved (and participate in the merge). -}
|
- the index are preserved (and participate in the merge). -}
|
||||||
merge_index :: Repo -> [String] -> IO ()
|
merge_index :: CatFileHandle -> Repo -> [String] -> IO ()
|
||||||
merge_index repo bs =
|
merge_index h repo bs =
|
||||||
update_index repo =<< concat <$> mapM (`merge_tree_index` repo) bs
|
update_index repo =<< concat <$> mapM (\b -> merge_tree_index b h repo) bs
|
||||||
|
|
||||||
{- Feeds a list into update-index. Later items in the list can override
|
{- Feeds a list into update-index. Later items in the list can override
|
||||||
- earlier ones, so the list can be generated from any combination of
|
- earlier ones, so the list can be generated from any combination of
|
||||||
|
@ -60,22 +63,22 @@ ls_tree x = pipeNullSplit params
|
||||||
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
|
params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
|
||||||
|
|
||||||
{- For merging two trees. -}
|
{- For merging two trees. -}
|
||||||
merge_trees :: String -> String -> Repo -> IO [String]
|
merge_trees :: String -> String -> CatFileHandle -> Repo -> IO [String]
|
||||||
merge_trees x y = calc_merge $ "diff-tree":diff_opts ++ [x, y]
|
merge_trees x y h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
|
||||||
|
|
||||||
{- For merging a single tree into the index. -}
|
{- For merging a single tree into the index. -}
|
||||||
merge_tree_index :: String -> Repo -> IO [String]
|
merge_tree_index :: String -> CatFileHandle -> Repo -> IO [String]
|
||||||
merge_tree_index x = calc_merge $ "diff-index":diff_opts ++ ["--cached", x]
|
merge_tree_index x h = calc_merge h $ "diff-index":diff_opts ++ ["--cached", x]
|
||||||
|
|
||||||
diff_opts :: [String]
|
diff_opts :: [String]
|
||||||
diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"]
|
diff_opts = ["--raw", "-z", "-r", "--no-renames", "-l0"]
|
||||||
|
|
||||||
{- Calculates how to perform a merge, using git to get a raw diff,
|
{- Calculates how to perform a merge, using git to get a raw diff,
|
||||||
- and returning a list suitable for update_index. -}
|
- and returning a list suitable for update_index. -}
|
||||||
calc_merge :: [String] -> Repo -> IO [String]
|
calc_merge :: CatFileHandle -> [String] -> Repo -> IO [String]
|
||||||
calc_merge differ repo = do
|
calc_merge h differ repo = do
|
||||||
diff <- pipeNullSplit (map Param differ) repo
|
diff <- pipeNullSplit (map Param differ) repo
|
||||||
l <- mapM (\p -> mergeFile p repo) (pairs diff)
|
l <- mapM (\p -> mergeFile p h repo) (pairs diff)
|
||||||
return $ catMaybes l
|
return $ catMaybes l
|
||||||
where
|
where
|
||||||
pairs [] = []
|
pairs [] = []
|
||||||
|
@ -97,12 +100,12 @@ hashObject content repo = getSha subcmd $ do
|
||||||
{- Given an info line from a git raw diff, and the filename, generates
|
{- Given an info line from a git raw diff, and the filename, generates
|
||||||
- a line suitable for update_index that union merges the two sides of the
|
- a line suitable for update_index that union merges the two sides of the
|
||||||
- diff. -}
|
- diff. -}
|
||||||
mergeFile :: (String, FilePath) -> Repo -> IO (Maybe String)
|
mergeFile :: (String, FilePath) -> CatFileHandle -> Repo -> IO (Maybe String)
|
||||||
mergeFile (info, file) repo = case filter (/= nullsha) [asha, bsha] of
|
mergeFile (info, file) h repo = case filter (/= nullsha) [asha, bsha] of
|
||||||
[] -> return Nothing
|
[] -> return Nothing
|
||||||
(sha:[]) -> return $ Just $ update_index_line sha file
|
(sha:[]) -> return $ Just $ update_index_line sha file
|
||||||
shas -> do
|
shas -> do
|
||||||
content <- pipeRead (map Param ("show":shas)) repo
|
content <- L.concat <$> mapM (catObject h) shas
|
||||||
sha <- hashObject (unionmerge content) repo
|
sha <- hashObject (unionmerge content) repo
|
||||||
return $ Just $ update_index_line sha file
|
return $ Just $ update_index_line sha file
|
||||||
where
|
where
|
||||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -6,6 +6,7 @@ git-annex (3.20111112) UNRELEASED; urgency=low
|
||||||
no longer needs to auto-merge.
|
no longer needs to auto-merge.
|
||||||
* init: When run in an already initalized repository, and without
|
* init: When run in an already initalized repository, and without
|
||||||
a description specified, don't delete the old description.
|
a description specified, don't delete the old description.
|
||||||
|
* Optimised union merging; now only runs git cat-file once.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Sat, 12 Nov 2011 14:50:21 -0400
|
-- Joey Hess <joeyh@debian.org> Sat, 12 Nov 2011 14:50:21 -0400
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,10 @@ Instead, I'd like a way to stream multiple objects into git using stdin.
|
||||||
Sometime, should look at either extending git-hash-object to support that,
|
Sometime, should look at either extending git-hash-object to support that,
|
||||||
or possibly look at using git-fast-import instead.
|
or possibly look at using git-fast-import instead.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
`git-annex merge` also runs `git show` once per file that needs to be
|
`git-annex merge` also runs `git show` once per file that needs to be
|
||||||
merged. This could be reduced to a single call to `git-cat-file --batch`,
|
merged. This could be reduced to a single call to `git-cat-file --batch`,
|
||||||
There is already a Git.CatFile library that can do this easily. --[[Joey]]
|
There is already a Git.CatFile library that can do this easily. --[[Joey]]
|
||||||
|
|
||||||
|
> This is now done, part above remains todo. --[[Joey]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue