From 051c68041b5b7a58e7080403e389d0641691edfd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 27 Jun 2012 12:09:01 -0400 Subject: [PATCH 1/7] properly handle deleted files when processing ls-files --unmerged --- Command/Sync.hs | 24 ++++++++++++-- Git/LsFiles.hs | 88 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 94 insertions(+), 18 deletions(-) diff --git a/Command/Sync.hs b/Command/Sync.hs index 1da6b0b812..2f38636175 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -155,10 +155,30 @@ mergeAnnex = do Annex.Branch.forceUpdate stop -mergeFrom :: Git.Ref -> CommandCleanup +mergeFrom :: Git.Ref -> Annex Bool mergeFrom branch = do showOutput - inRepo $ Git.Merge.mergeNonInteractive branch + ok <- inRepo $ Git.Merge.mergeNonInteractive branch + if ok + then return ok + else resolveMerge + +{- Resolves a conflicted merge. It's important that any conflicts be + - resolved in a way that itself avoids later merge conflicts, since + - multiple repositories may be doing this concurrently. + - + - Only annexed files are resolved; other files are left for the user to + - handle. + - + - This uses the Keys pointed to by the files to construct new + - filenames. So a conflicted merge of file foo will delete it, + - and add files foo.KEYA and foo.KEYB. + - + - A conflict can also result due to + -} +resolveMerge :: Annex Bool +resolveMerge = do + changed :: Remote -> Git.Ref -> Annex Bool changed remote b = do diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index 540503a28a..ce7c84aee9 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -13,6 +13,9 @@ module Git.LsFiles ( changedUnstaged, typeChanged, typeChangedStaged, + Conflicting(..), + Unmerged(..), + unmerged, ) where import Common @@ -78,25 +81,78 @@ typeChanged' ps l repo = do prefix = [Params "diff --name-only --diff-filter=T -z"] suffix = Param "--" : map File l +{- A item in conflict has two possible values. + - Either can be Nothing, when that side deleted the file. -} +data Conflicting v = Conflicting + { valUs :: Maybe v + , valThem :: Maybe v + } deriving (Show) + +isConflicting :: Eq a => Conflicting a -> Bool +isConflicting (Conflicting a b) = a /= b + data Unmerged = Unmerged { unmergedFile :: FilePath - , unmergedBlobType :: BlobType - , unmergedSha :: Sha - } + , unmergedBlobType :: Conflicting BlobType + , unmergedSha :: Conflicting Sha + } deriving (Show) {- Returns a list of the files in the specified locations that have - - unresolved merge conflicts. Each unmerged file will have duplicates - - in the list for each unmerged version (typically two). -} + - unresolved merge conflicts. + - + - ls-files outputs multiple lines per conflicting file, each with its own + - stage number: + - 1 = old version, can be ignored + - 2 = us + - 3 = them + - If a line is omitted, that side deleted the file. + -} unmerged :: [FilePath] -> Repo -> IO [Unmerged] -unmerged l repo = catMaybes . map parse <$> list repo +unmerged l repo = reduceUnmerged [] . catMaybes . map parseUnmerged <$> list repo where - list = pipeNullSplit $ Params "ls-files --unmerged -z --" : map File l - parse s - | null file || length ws < 2 = Nothing - | otherwise = do - blobtype <- readBlobType (ws !! 0) - sha <- extractSha (ws !! 1) - return $ Unmerged file blobtype sha - where - (metadata, file) = separate (== '\t') s - ws = words metadata + files = map File l + list = pipeNullSplit $ Params "ls-files --unmerged -z --" : files + +data InternalUnmerged = InternalUnmerged + { isus :: Bool + , ifile :: FilePath + , iblobtype :: Maybe BlobType + , isha :: Maybe Sha + } deriving (Show) + +parseUnmerged :: String -> Maybe InternalUnmerged +parseUnmerged s + | null file || length ws < 3 = Nothing + | otherwise = do + stage <- readish (ws !! 2) + unless (stage == 2 || stage == 3) $ + fail undefined -- skip stage 1 + blobtype <- readBlobType (ws !! 0) + sha <- extractSha (ws !! 1) + return $ InternalUnmerged (stage == 2) file (Just blobtype) (Just sha) + where + (metadata, file) = separate (== '\t') s + ws = words metadata + +reduceUnmerged :: [Unmerged] -> [InternalUnmerged] -> [Unmerged] +reduceUnmerged c [] = c +reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest + where + (rest, sibi) = findsib i is + (blobtypeA, blobtypeB, shaA, shaB) + | isus i = (iblobtype i, iblobtype sibi, isha i, isha sibi) + | otherwise = (iblobtype sibi, iblobtype i, isha sibi, isha i) + new = Unmerged + { unmergedFile = ifile i + , unmergedBlobType = Conflicting blobtypeA blobtypeB + , unmergedSha = Conflicting shaA shaB + } + findsib templatei [] = ([], deleted templatei) + findsib templatei (i:is) + | ifile i == ifile templatei = (is, i) + | otherwise = (i:is, deleted templatei) + deleted templatei = templatei + { isus = not (isus templatei) + , iblobtype = Nothing + , isha = Nothing + } From 048b64024a14feb0d9ed26abe97c542cfacbc8af Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 27 Jun 2012 13:08:32 -0400 Subject: [PATCH 2/7] sync: Automatically resolves merge conflicts. untested, but it compiles :) --- Command/Sync.hs | 53 +++++++++++++++++++++++++++++++++++++++++----- Git/LsFiles.hs | 11 ++++------ Git/Types.hs | 2 ++ debian/changelog | 1 + doc/git-annex.mdwn | 5 +++++ 5 files changed, 60 insertions(+), 12 deletions(-) diff --git a/Command/Sync.hs b/Command/Sync.hs index 2f38636175..a39a2e57f1 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -15,15 +15,21 @@ import Command import qualified Remote import qualified Annex import qualified Annex.Branch +import qualified Annex.Queue +import Annex.Content +import Annex.CatFile import qualified Git.Command +import qualified Git.LsFiles as LsFiles import qualified Git.Merge import qualified Git.Branch import qualified Git.Ref import qualified Git +import Git.Types (BlobType(..)) import qualified Types.Remote import qualified Remote.Git import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L def :: [Command] def = [command "sync" (paramOptional (paramRepeating paramRemote)) @@ -161,7 +167,11 @@ mergeFrom branch = do ok <- inRepo $ Git.Merge.mergeNonInteractive branch if ok then return ok - else resolveMerge + else do + merged <- resolveMerge + when merged $ + showNote "merge conflict automatically resolved" + return merged {- Resolves a conflicted merge. It's important that any conflicts be - resolved in a way that itself avoids later merge conflicts, since @@ -171,15 +181,48 @@ mergeFrom branch = do - handle. - - This uses the Keys pointed to by the files to construct new - - filenames. So a conflicted merge of file foo will delete it, - - and add files foo.KEYA and foo.KEYB. + - filenames. So when both sides modified file foo, + - it will be deleted, and replaced with files foo.KEYA and foo.KEYB. - - - A conflict can also result due to + - On the other hand, when one side deleted foo, and the other modified it, + - it will be deleted, and the modified version stored as file + - foo.KEYA (or KEYB). -} resolveMerge :: Annex Bool resolveMerge = do - + top <- fromRepo Git.repoPath + all id <$> (mapM resolveMerge' =<< inRepo (LsFiles.unmerged [top])) +resolveMerge' :: LsFiles.Unmerged -> Annex Bool +resolveMerge' u + | issymlink LsFiles.valUs && issymlink LsFiles.valThem = do + keyUs <- getkey LsFiles.valUs + keyThem <- getkey LsFiles.valThem + if (keyUs == keyThem) + then makelink keyUs (file ++ "." ++ show keyUs) + else do + void $ liftIO $ tryIO $ removeFile file + Annex.Queue.addCommand "rm" [Params "--quiet -f --"] [file] + makelink keyUs (file ++ "." ++ show keyUs) + makelink keyThem (file ++ "." ++ show keyThem) + return True + | otherwise = return False + where + file = LsFiles.unmergedFile u + issymlink select = any (select (LsFiles.unmergedBlobType u) ==) + [Just SymlinkBlob, Nothing] + makelink (Just key) f = do + l <- calcGitLink file key + liftIO $ createSymbolicLink l f + Annex.Queue.addCommand "add" [Param "--force", Param "--"] [f] + makelink _ _ = noop + getkey select = do + let msha = select $ LsFiles.unmergedSha u + case msha of + Nothing -> return Nothing + Just sha -> fileKey . takeFileName + . encodeW8 . L.unpack <$> catObject sha + changed :: Remote -> Git.Ref -> Annex Bool changed remote b = do let r = remoteBranch remote b diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index ce7c84aee9..321913334b 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -88,9 +88,6 @@ data Conflicting v = Conflicting , valThem :: Maybe v } deriving (Show) -isConflicting :: Eq a => Conflicting a -> Bool -isConflicting (Conflicting a b) = a /= b - data Unmerged = Unmerged { unmergedFile :: FilePath , unmergedBlobType :: Conflicting BlobType @@ -124,7 +121,7 @@ parseUnmerged :: String -> Maybe InternalUnmerged parseUnmerged s | null file || length ws < 3 = Nothing | otherwise = do - stage <- readish (ws !! 2) + stage <- readish (ws !! 2) :: Maybe Int unless (stage == 2 || stage == 3) $ fail undefined -- skip stage 1 blobtype <- readBlobType (ws !! 0) @@ -148,9 +145,9 @@ reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest , unmergedSha = Conflicting shaA shaB } findsib templatei [] = ([], deleted templatei) - findsib templatei (i:is) - | ifile i == ifile templatei = (is, i) - | otherwise = (i:is, deleted templatei) + findsib templatei (l:ls) + | ifile l == ifile templatei = (ls, l) + | otherwise = (l:ls, deleted templatei) deleted templatei = templatei { isus = not (isus templatei) , iblobtype = Nothing diff --git a/Git/Types.hs b/Git/Types.hs index e8cdbb442d..0c37427c7d 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -51,6 +51,7 @@ type Tag = Ref {- Types of objects that can be stored in git. -} data ObjectType = BlobObject | CommitObject | TreeObject + deriving (Eq) instance Show ObjectType where show BlobObject = "blob" @@ -65,6 +66,7 @@ readObjectType _ = Nothing {- Types of blobs. -} data BlobType = FileBlob | ExecutableBlob | SymlinkBlob + deriving (Eq) {- Git uses magic numbers to denote the type of a blob. -} instance Show BlobType where diff --git a/debian/changelog b/debian/changelog index c1ebac8398..46afb6e4d5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -9,6 +9,7 @@ git-annex (3.20120625) UNRELEASED; urgency=low * Accept arbitrarily encoded repository filepaths etc when reading git config output. This fixes support for remotes with unusual characters in their names. + * sync: Automatically resolves merge conflicts. -- Joey Hess Mon, 25 Jun 2012 11:38:12 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 39fad04882..c52a5f3bf9 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -135,6 +135,11 @@ subdirectories). commands to do each of those steps by hand, or if you don't want to worry about the details, you can use sync. + Merge conflicts are automatically resolved by sync. When two conflicting + versions of a file have been committed, both will be added to the tree, + under different filenames. For example, file "foo" would be replaced + with "foo.somekey" and "foo.otherkey". + Note that syncing with a remote will not update the remote's working tree with changes made to the local repository. However, those changes are pushed to the remote, so can be merged into its working tree From abd36ed33659f9b0b369c6d2510455365a943e3c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 27 Jun 2012 13:35:02 -0400 Subject: [PATCH 3/7] don't automerge when the symlinks cannot be parsed as keys --- Command/Sync.hs | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/Command/Sync.hs b/Command/Sync.hs index a39a2e57f1..8ac0399435 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -195,19 +195,21 @@ resolveMerge = do resolveMerge' :: LsFiles.Unmerged -> Annex Bool resolveMerge' u - | issymlink LsFiles.valUs && issymlink LsFiles.valThem = do - keyUs <- getkey LsFiles.valUs - keyThem <- getkey LsFiles.valThem - if (keyUs == keyThem) - then makelink keyUs (file ++ "." ++ show keyUs) - else do + | issymlink LsFiles.valUs && issymlink LsFiles.valThem = + withKey LsFiles.valUs $ \keyUs -> + withKey LsFiles.valThem $ \keyThem -> go keyUs keyThem + | otherwise = return False + where + go keyUs keyThem + | keyUs == keyThem = do + makelink keyUs (file ++ "." ++ show keyUs) + return True + | otherwise = do void $ liftIO $ tryIO $ removeFile file Annex.Queue.addCommand "rm" [Params "--quiet -f --"] [file] makelink keyUs (file ++ "." ++ show keyUs) makelink keyThem (file ++ "." ++ show keyThem) - return True - | otherwise = return False - where + return True file = LsFiles.unmergedFile u issymlink select = any (select (LsFiles.unmergedBlobType u) ==) [Just SymlinkBlob, Nothing] @@ -216,12 +218,15 @@ resolveMerge' u liftIO $ createSymbolicLink l f Annex.Queue.addCommand "add" [Param "--force", Param "--"] [f] makelink _ _ = noop - getkey select = do + withKey select a = do let msha = select $ LsFiles.unmergedSha u case msha of - Nothing -> return Nothing - Just sha -> fileKey . takeFileName - . encodeW8 . L.unpack <$> catObject sha + Nothing -> a Nothing + Just sha -> do + key <- fileKey . takeFileName + . encodeW8 . L.unpack + <$> catObject sha + maybe (return False) (a . Just) key changed :: Remote -> Git.Ref -> Annex Bool changed remote b = do From 8810e57995f78876d5eb2b5429272d884c5e25c2 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 27 Jun 2012 15:00:26 -0400 Subject: [PATCH 4/7] fix file name --- Command/Sync.hs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Command/Sync.hs b/Command/Sync.hs index 8ac0399435..759afed822 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -202,22 +202,23 @@ resolveMerge' u where go keyUs keyThem | keyUs == keyThem = do - makelink keyUs (file ++ "." ++ show keyUs) + makelink keyUs return True | otherwise = do void $ liftIO $ tryIO $ removeFile file Annex.Queue.addCommand "rm" [Params "--quiet -f --"] [file] - makelink keyUs (file ++ "." ++ show keyUs) - makelink keyThem (file ++ "." ++ show keyThem) + makelink keyUs + makelink keyThem return True file = LsFiles.unmergedFile u issymlink select = any (select (LsFiles.unmergedBlobType u) ==) [Just SymlinkBlob, Nothing] - makelink (Just key) f = do - l <- calcGitLink file key - liftIO $ createSymbolicLink l f - Annex.Queue.addCommand "add" [Param "--force", Param "--"] [f] - makelink _ _ = noop + makelink (Just key) = do + let dest = file ++ "." ++ show key + l <- calcGitLink dest key + liftIO $ createSymbolicLink l dest + Annex.Queue.addCommand "add" [Param "--force", Param "--"] [dest] + makelink _ = noop withKey select a = do let msha = select $ LsFiles.unmergedSha u case msha of From 9147ad74931222f05b76102bfea61b1fe177fd32 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 27 Jun 2012 15:03:13 -0400 Subject: [PATCH 5/7] commit merge resolution this is necessary so the sync can continue successfully with its push phase --- Command/Sync.hs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Command/Sync.hs b/Command/Sync.hs index 759afed822..b146379d14 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -167,11 +167,7 @@ mergeFrom branch = do ok <- inRepo $ Git.Merge.mergeNonInteractive branch if ok then return ok - else do - merged <- resolveMerge - when merged $ - showNote "merge conflict automatically resolved" - return merged + else resolveMerge {- Resolves a conflicted merge. It's important that any conflicts be - resolved in a way that itself avoids later merge conflicts, since @@ -191,7 +187,12 @@ mergeFrom branch = do resolveMerge :: Annex Bool resolveMerge = do top <- fromRepo Git.repoPath - all id <$> (mapM resolveMerge' =<< inRepo (LsFiles.unmerged [top])) + merged <- all id <$> (mapM resolveMerge' =<< inRepo (LsFiles.unmerged [top])) + when merged $ do + Annex.Queue.flush + void $ inRepo $ Git.Command.runBool "commit" + [Param "-m", Param "git-annex automatic merge resolution"] + return merged resolveMerge' :: LsFiles.Unmerged -> Annex Bool resolveMerge' u From 054ddda18a48abce03a1c0b50aef4eed714aa320 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 27 Jun 2012 16:03:42 -0400 Subject: [PATCH 6/7] better filenames for conflict resolution files --- Command/Sync.hs | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/Command/Sync.hs b/Command/Sync.hs index b146379d14..5e63ee63ad 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -30,6 +30,7 @@ import qualified Remote.Git import qualified Data.Map as M import qualified Data.ByteString.Lazy as L +import Data.Hash.MD5 def :: [Command] def = [command "sync" (paramOptional (paramRepeating paramRemote)) @@ -191,7 +192,7 @@ resolveMerge = do when merged $ do Annex.Queue.flush void $ inRepo $ Git.Command.runBool "commit" - [Param "-m", Param "git-annex automatic merge resolution"] + [Param "-m", Param "git-annex automatic merge conflict fix"] return merged resolveMerge' :: LsFiles.Unmerged -> Annex Bool @@ -206,7 +207,7 @@ resolveMerge' u makelink keyUs return True | otherwise = do - void $ liftIO $ tryIO $ removeFile file + liftIO $ nukeFile file Annex.Queue.addCommand "rm" [Params "--quiet -f --"] [file] makelink keyUs makelink keyThem @@ -215,9 +216,11 @@ resolveMerge' u issymlink select = any (select (LsFiles.unmergedBlobType u) ==) [Just SymlinkBlob, Nothing] makelink (Just key) = do - let dest = file ++ "." ++ show key + let dest = mergeFile file key l <- calcGitLink dest key - liftIO $ createSymbolicLink l dest + liftIO $ do + nukeFile dest + createSymbolicLink l dest Annex.Queue.addCommand "add" [Param "--force", Param "--"] [dest] makelink _ = noop withKey select a = do @@ -229,7 +232,35 @@ resolveMerge' u . encodeW8 . L.unpack <$> catObject sha maybe (return False) (a . Just) key - + +{- The filename to use when resolving a conflicted merge of a file, + - that points to a key. + - + - Something derived from the key needs to be included in the filename, + - but rather than exposing the whole key to the user, a very weak hash + - is used. There is a very real, although still unlikely, chance of + - conflicts using this hash. + - + - In the event that there is a conflict with the filename generated + - for some other key, that conflict will itself be handled by the + - conflicted merge resolution code. That case is detected, and the full + - key is used in the filename. + -} +mergeFile :: FilePath -> Key -> FilePath +mergeFile file key + | doubleconflict = go $ show key + | otherwise = go $ shortHash $ show key + where + vermarker = ".version-" + doubleconflict = vermarker `isSuffixOf` (dropExtension file) + go v = takeDirectory file + dropExtension (takeFileName file) + ++ vermarker ++ v + ++ takeExtension file + +shortHash :: String -> String +shortHash = take 4 . md5s . encodeFilePath + changed :: Remote -> Git.Ref -> Annex Bool changed remote b = do let r = remoteBranch remote b From 36ddb81df6938cd604ecccea52ae758f481fd79b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 27 Jun 2012 16:09:17 -0400 Subject: [PATCH 7/7] use "variant" rather than "version" While this word may be less familiar to some users, it avoids the connotation that version 2 is better than version 1, which is wrong when the two variants were conflicting. --- Command/Sync.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Command/Sync.hs b/Command/Sync.hs index 5e63ee63ad..06e1fd5c92 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -251,11 +251,11 @@ mergeFile file key | doubleconflict = go $ show key | otherwise = go $ shortHash $ show key where - vermarker = ".version-" + varmarker = ".variant-" doubleconflict = vermarker `isSuffixOf` (dropExtension file) go v = takeDirectory file dropExtension (takeFileName file) - ++ vermarker ++ v + ++ varmarker ++ v ++ takeExtension file shortHash :: String -> String