always find optimal merge

Testing b9ac585454, it didn't find the
optimal union merge, the second sha was the one to use, at least in
the case I tried. Let's just try all shas to see if any can be reused.

I stopped using the expensive nub, so despite the use of sets to
sort/uniq file contents, this is probably as fast or faster than it
was before.
This commit is contained in:
Joey Hess 2011-12-12 01:33:02 -04:00
parent 0cbab5de65
commit acd7a52dfd
2 changed files with 26 additions and 14 deletions

View file

@ -15,8 +15,8 @@ module Git.UnionMerge (
) where ) where
import System.Cmd.Utils import System.Cmd.Utils
import Data.List
import qualified Data.ByteString.Lazy.Char8 as L import qualified Data.ByteString.Lazy.Char8 as L
import qualified Data.Set as S
import Common import Common
import Git import Git
@ -103,22 +103,14 @@ calc_merge ch differ repo streamer = gendiff >>= go
mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String) mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String)
mergeFile info file h repo = case filter (/= nullsha) [Ref asha, Ref bsha] of mergeFile info file h repo = case filter (/= nullsha) [Ref asha, Ref bsha] of
[] -> return Nothing [] -> return Nothing
(sha:[]) -> return $ Just $ update_index_line sha file (sha:[]) -> use sha
(sha:shas) -> do shas -> use =<< either return (hashObject repo . L.unlines) =<<
newsha <- maybe (return sha) (hashObject repo . L.unlines) =<< calcMerge . zip shas <$> mapM getcontents shas
unionmerge
<$> (L.lines <$> catObject h sha)
<*> (map L.lines <$> mapM (catObject h) shas)
return $ Just $ update_index_line newsha file
where where
[_colonmode, _bmode, asha, bsha, _status] = words info [_colonmode, _bmode, asha, bsha, _status] = words info
nullsha = Ref $ replicate shaSize '0' nullsha = Ref $ replicate shaSize '0'
getcontents s = L.lines <$> catObject h s
unionmerge origcontent content use sha = return $ Just $ update_index_line sha file
| newcontent == origcontent = Nothing
| otherwise = Just newcontent
where
newcontent = nub $ concat $ origcontent:content
{- Injects some content into git, returning its Sha. -} {- Injects some content into git, returning its Sha. -}
hashObject :: Repo -> L.ByteString -> IO Sha hashObject :: Repo -> L.ByteString -> IO Sha
@ -131,3 +123,17 @@ hashObject repo content = getSha subcmd $ do
where where
subcmd = "hash-object" subcmd = "hash-object"
params = [subcmd, "-w", "--stdin"] params = [subcmd, "-w", "--stdin"]
{- Calculates a union merge between a list of refs, with contents.
-
- When possible, reuses the content of an existing ref, rather than
- generating new content.
-}
calcMerge :: [(Ref, [L.ByteString])] -> Either Ref [L.ByteString]
calcMerge shacontents
| null reuseable = Right $ new
| otherwise = Left $ fst $ head reuseable
where
reuseable = filter (\c -> sorteduniq (snd c) == new) shacontents
new = sorteduniq $ concat $ map snd shacontents
sorteduniq = S.toList . S.fromList

6
debian/changelog vendored
View file

@ -1,3 +1,9 @@
git-annex (3.20111212) UNRELEASED; urgency=low
* Union merge now finds the least expensive way to represent the merge.
-- Joey Hess <joeyh@debian.org> Mon, 12 Dec 2011 01:57:49 -0400
git-annex (3.20111211) unstable; urgency=medium git-annex (3.20111211) unstable; urgency=medium
* Fix bug in last version in getting contents from bare repositories. * Fix bug in last version in getting contents from bare repositories.