From b6b34f4916cfd0d935b81bc11ef53b6659e95275 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Dec 2015 15:41:09 -0400 Subject: [PATCH] automatic conflict resolution for v6 unlocked files Several tricky parts: * When the conflict is just between the same key being locked and unlocked, the unlocked version wins, and the file is not renamed in this case. * Need to update associated file map when conflict resolution renames an unlocked file. * git merge runs the smudge filter on the conflicting file, and actually overwrites the file with the same content it had before, and so invalidates its inode cache. This makes it difficult to know when it's safe to remove such files as conflict cruft, without going so far as to compare their entire contents. Dealt with this by preventing the smudge filter from populating the file when a merge is run. However, that also prevents the smudge filter being run for non-conflicting files, so eg moving a file won't put its new content into place. * Ideally, if a merge or a merge conflict resolution renames an unlocked file, the file in the work tree can just be moved, rather than copying the content to a new worktree file. This is attempted to be done in merge conflict resolution, but due to git merge's behavior of running smudge filters, what actually seems to happen is the old worktree file with the content is deleted and rewritten as a pointer file, so doesn't get reused. So, this is probably not as efficient as it optimally could be. If that becomes a problem, could look into running the merge in a separate worktree and updating the real worktree more efficiently, similarly to the direct mode merge. However, the direct mode merge had a lot of bugs, and I'd rather not use that more error-prone method unless really needed. --- Annex/AutoMerge.hs | 176 +++++++++++++++++++++++++++++++----------- Command/Smudge.hs | 15 +++- Utility/InodeCache.hs | 4 +- doc/todo/smudge.mdwn | 4 - 4 files changed, 145 insertions(+), 54 deletions(-) diff --git a/Annex/AutoMerge.hs b/Annex/AutoMerge.hs index c32c3f66ac..89bcff0d62 100644 --- a/Annex/AutoMerge.hs +++ b/Annex/AutoMerge.hs @@ -1,6 +1,6 @@ {- git-annex automatic merge conflict resolution - - - Copyright 2012-2014 Joey Hess + - Copyright 2012-2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -16,6 +16,7 @@ import qualified Annex.Queue import Annex.Direct import Annex.CatFile import Annex.Link +import Annex.Content import qualified Git.LsFiles as LsFiles import qualified Git.UpdateIndex as UpdateIndex import qualified Git.Merge @@ -26,15 +27,19 @@ import Git.Types (BlobType(..)) import Config import Annex.ReplaceFile import Annex.VariantFile +import qualified Database.Keys +import Annex.InodeSentinal +import Utility.InodeCache +import Command.Smudge (withSmudgeDisabled) import qualified Data.Set as S +import qualified Data.Map as M -{- Merges from a branch into the current branch - - (which may not exist yet), +{- Merges from a branch into the current branch (which may not exist yet), - with automatic merge conflict resolution. - - Callers should use Git.Branch.changed first, to make sure that - - there are changed from the current branch to the branch being merged in. + - there are changes from the current branch to the branch being merged in. -} autoMergeFrom :: Git.Ref -> Maybe Git.Ref -> Git.Branch.CommitMode -> Annex Bool autoMergeFrom branch currbranch commitmode = do @@ -45,7 +50,8 @@ autoMergeFrom branch currbranch commitmode = do where go old = ifM isDirect ( mergeDirect currbranch old branch (resolveMerge old branch) commitmode - , inRepo (Git.Merge.mergeNonInteractive branch commitmode) + -- Avoid smudge filter populating files while merging. + , withSmudgeDisabled (Git.Merge.mergeNonInteractive branch commitmode) <||> (resolveMerge old branch <&&> commitResolvedMerge commitmode) ) @@ -77,6 +83,12 @@ autoMergeFrom branch currbranch commitmode = do - the index, and written to the gitAnnexMergeDir, for later handling by - the direct mode merge code. - + - Unlocked files remain unlocked after merging, and locked files + - remain locked. When the merge conflict is between a locked and unlocked + - file, that otherwise point to the same content, the unlocked mode wins. + - This is done because only unlocked files work in filesystems that don't + - support symlinks. + - - Returns false when there are no merge conflicts to resolve. - A git merge can fail for other reasons, and this allows detecting - such failures. @@ -85,8 +97,11 @@ resolveMerge :: Maybe Git.Ref -> Git.Ref -> Annex Bool resolveMerge us them = do top <- fromRepo Git.repoPath (fs, cleanup) <- inRepo (LsFiles.unmerged [top]) - mergedfs <- catMaybes <$> mapM (resolveMerge' us them) fs - let merged = not (null mergedfs) + srcmap <- inodeMap $ pure (map LsFiles.unmergedFile fs, return True) + (mergedks, mergedfs) <- unzip <$> mapM (resolveMerge' srcmap us them) fs + let mergedks' = concat mergedks + let mergedfs' = catMaybes mergedfs + let merged = not (null mergedfs') void $ liftIO cleanup unlessM isDirect $ do @@ -98,46 +113,57 @@ resolveMerge us them = do void $ liftIO cleanup2 when merged $ do - unlessM isDirect $ - cleanConflictCruft mergedfs top + unlessM isDirect $ do + unstagedmap <- inodeMap $ inRepo $ LsFiles.notInRepo False [top] + cleanConflictCruft mergedks' mergedfs' unstagedmap Annex.Queue.flush showLongNote "Merge conflict was automatically resolved; you may want to examine the result." return merged -resolveMerge' :: Maybe Git.Ref -> Git.Ref -> LsFiles.Unmerged -> Annex (Maybe FilePath) -resolveMerge' Nothing _ _ = return Nothing -resolveMerge' (Just us) them u = do - kus <- getkey LsFiles.valUs LsFiles.valUs - kthem <- getkey LsFiles.valThem LsFiles.valThem +resolveMerge' :: InodeMap -> Maybe Git.Ref -> Git.Ref -> LsFiles.Unmerged -> Annex ([Key], Maybe FilePath) +resolveMerge' _ Nothing _ _ = return ([], Nothing) +resolveMerge' unstagedmap (Just us) them u = do + kus <- getkey LsFiles.valUs + kthem <- getkey LsFiles.valThem case (kus, kthem) of -- Both sides of conflict are annexed files (Just keyUs, Just keyThem) - | keyUs /= keyThem -> resolveby $ do - makelink keyUs - makelink keyThem - | otherwise -> resolveby $ - makelink keyUs + | keyUs /= keyThem -> resolveby [keyUs, keyThem] $ do + makeannexlink keyUs LsFiles.valUs + makeannexlink keyThem LsFiles.valThem + liftIO $ nukeFile file + | otherwise -> resolveby [keyUs, keyThem] $ do + -- Only resolve using symlink when both + -- were locked, otherwise use unlocked pointer. + if islocked LsFiles.valUs && islocked LsFiles.valThem + then makesymlink keyUs + else makepointer keyUs + liftIO $ nukeFile file -- Our side is annexed file, other side is not. - (Just keyUs, Nothing) -> resolveby $ do + (Just keyUs, Nothing) -> resolveby [keyUs] $ do graftin them file LsFiles.valThem LsFiles.valThem - makelink keyUs + makeannexlink keyUs LsFiles.valUs -- Our side is not annexed file, other side is. - (Nothing, Just keyThem) -> resolveby $ do + (Nothing, Just keyThem) -> resolveby [keyThem] $ do graftin us file LsFiles.valUs LsFiles.valUs - makelink keyThem + makeannexlink keyThem LsFiles.valThem -- Neither side is annexed file; cannot resolve. - (Nothing, Nothing) -> return Nothing + (Nothing, Nothing) -> return ([], Nothing) where file = LsFiles.unmergedFile u - getkey select select' - | select (LsFiles.unmergedBlobType u) == Just SymlinkBlob = - case select' (LsFiles.unmergedSha u) of - Nothing -> return Nothing - Just sha -> catKey sha - | otherwise = return Nothing + getkey select = + case select (LsFiles.unmergedSha u) of + Just sha -> catKey sha + Nothing -> return Nothing - makelink key = do + islocked select = select (LsFiles.unmergedBlobType u) == Just SymlinkBlob + + makeannexlink key select + | islocked select = makesymlink key + | otherwise = makepointer key + + makesymlink key = do let dest = variantFile file key l <- calcRepo $ gitAnnexLink dest key replacewithlink dest l @@ -150,6 +176,17 @@ resolveMerge' (Just us) them u = do , replaceFile dest $ makeGitLink link ) + makepointer key = do + let dest = variantFile file key + unlessM (reuseOldFile unstagedmap key file dest) $ do + r <- linkFromAnnex key dest + case r of + LinkAnnexFailed -> liftIO $ + writeFile dest (formatPointer key) + _ -> noop + stagePointerFile dest =<< hashPointerFile key + Database.Keys.addAssociatedFile key dest + {- Stage a graft of a directory or file from a branch. - - When there is a conflicted merge where one side is a directory @@ -175,34 +212,66 @@ resolveMerge' (Just us) them u = do Just sha -> do link <- catSymLinkTarget sha replacewithlink item link - - resolveby a = do + + resolveby ks a = do {- Remove conflicted file from index so merge can be resolved. -} Annex.Queue.addCommand "rm" [Param "--quiet", Param "-f", Param "--cached", Param "--"] [file] void a - return (Just file) + return (ks, Just file) {- git-merge moves conflicting files away to files - named something like f~HEAD or f~branch or just f, but the - exact name chosen can vary. Once the conflict is resolved, - this cruft can be deleted. To avoid deleting legitimate - files that look like this, only delete files that are - - A) not staged in git and B) look like git-annex symlinks. + - A) not staged in git and + - B) have a name related to the merged files and + - C) are pointers to or have the content of keys that were involved + - in the merge. -} -cleanConflictCruft :: [FilePath] -> FilePath -> Annex () -cleanConflictCruft resolvedfs top = do - (fs, cleanup) <- inRepo $ LsFiles.notInRepo False [top] - mapM_ clean fs - void $ liftIO cleanup - where - clean f - | matchesresolved f = whenM (isJust <$> isAnnexLink f) $ +cleanConflictCruft :: [Key] -> [FilePath] -> InodeMap -> Annex () +cleanConflictCruft resolvedks resolvedfs unstagedmap = do + is <- S.fromList . map (inodeCacheToKey Strongly) . concat + <$> mapM Database.Keys.getInodeCaches resolvedks + forM_ (M.toList unstagedmap) $ \(i, f) -> + whenM (matchesresolved is i f) $ liftIO $ nukeFile f - | otherwise = noop - s = S.fromList resolvedfs - matchesresolved f = S.member f s || S.member (base f) s - base f = reverse $ drop 1 $ dropWhile (/= '~') $ reverse f + where + fs = S.fromList resolvedfs + ks = S.fromList resolvedks + inks = maybe False (flip S.member ks) + matchesresolved is i f + | S.member f fs || S.member (conflictCruftBase f) fs = anyM id + [ pure (S.member i is) + , inks <$> isAnnexLink f + , inks <$> isPointerFile f + ] + | otherwise = return False + +conflictCruftBase :: FilePath -> FilePath +conflictCruftBase f = reverse $ drop 1 $ dropWhile (/= '~') $ reverse f + +{- When possible, reuse an existing file from the srcmap as the + - content of a worktree file in the resolved merge. It must have the + - same name as the origfile, or a name that git would use for conflict + - cruft. And, its inode cache must be a known one for the key. -} +reuseOldFile :: InodeMap -> Key -> FilePath -> FilePath -> Annex Bool +reuseOldFile srcmap key origfile destfile = do + is <- map (inodeCacheToKey Strongly) + <$> Database.Keys.getInodeCaches key + liftIO $ go $ mapMaybe (\i -> M.lookup i srcmap) is + where + go [] = return False + go (f:fs) + | f == origfile || conflictCruftBase f == origfile = + ifM (doesFileExist f) + ( do + renameFile f destfile + return True + , go fs + ) + | otherwise = go fs commitResolvedMerge :: Git.Branch.CommitMode -> Annex Bool commitResolvedMerge commitmode = inRepo $ Git.Branch.commitCommand commitmode @@ -210,3 +279,16 @@ commitResolvedMerge commitmode = inRepo $ Git.Branch.commitCommand commitmode , Param "-m" , Param "git-annex automatic merge conflict fix" ] + +type InodeMap = M.Map InodeCacheKey FilePath + +inodeMap :: Annex ([FilePath], IO Bool) -> Annex InodeMap +inodeMap getfiles = do + (fs, cleanup) <- getfiles + fsis <- forM fs $ \f -> do + mi <- withTSDelta (liftIO . genInodeCache f) + return $ case mi of + Nothing -> Nothing + Just i -> Just (inodeCacheToKey Strongly i, f) + void $ liftIO cleanup + return $ M.fromList $ catMaybes fsis diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 80c79554e5..f17eeea2ed 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -19,6 +19,9 @@ import Utility.InodeCache import Types.KeySource import Backend import Logs.Location +import Annex.Index (addGitEnv) +import Utility.Env +import qualified Git import qualified Database.Keys import qualified Data.ByteString.Lazy as B @@ -56,7 +59,7 @@ smudge file = do -- don't provide such modified content as it -- will be confusing. inAnnex will detect such -- modifications. - ifM (inAnnex k) + ifM ((not <$> smudgeDisabled) <&&> inAnnex k) ( do content <- calcRepo (gitAnnexLocation k) liftIO $ B.putStr . fromMaybe b @@ -66,6 +69,16 @@ smudge file = do Database.Keys.addAssociatedFile k file stop +-- Environment variable to disable smudging providing the content of keys. +smudgeDisabled :: Annex Bool +smudgeDisabled = liftIO $ isJust <$> getEnv smudgeDisableEnv + +smudgeDisableEnv :: String +smudgeDisableEnv = "ANNEX_SMUDGE_DISABLE" + +withSmudgeDisabled :: (Git.Repo -> IO a) -> Annex a +withSmudgeDisabled a = inRepo $ \r -> addGitEnv r smudgeDisableEnv "1" >>= a + -- Clean filter is fed file content on stdin, decides if a file -- should be stored in the annex, and outputs a pointer to its -- injested content. diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index 8bd7ae0cd2..e91771a077 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -54,7 +54,7 @@ newtype InodeCache = InodeCache InodeCachePrim {- Inode caches can be compared in two different ways, either weakly - or strongly. -} data InodeComparisonType = Weakly | Strongly - deriving (Eq, Ord) + deriving (Eq, Ord, Show) {- Strong comparison, including inodes. -} compareStrong :: InodeCache -> InodeCache -> Bool @@ -81,7 +81,7 @@ compareBy Weakly = compareWeak {- For use in a Map; it's determined at creation time whether this - uses strong or weak comparison for Eq. -} data InodeCacheKey = InodeCacheKey InodeComparisonType InodeCachePrim - deriving (Ord) + deriving (Ord, Show) instance Eq InodeCacheKey where (InodeCacheKey ctx x) == (InodeCacheKey cty y) = diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index 03e2539527..4c9f4acf67 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -18,10 +18,6 @@ git-annex should use smudge/clean filters. (And should avoid unlocking an object with a hard link if it's hard linked to a shared clone, but that's already accomplished because it avoids unlocking an object if it's hard linked at all) -* Make automatic merge conflict resolution work for pointer files. - - Should probably automatically handle merge conflicts between annex - symlinks and pointer files too. Maybe by always resulting in a pointer - file, since the symlinks don't work everwhere. * Crippled filesystem should cause all files to be transparently unlocked. Note that this presents problems when dealing with merge conflicts and when pushing changes committed in such a repo. Ideally, should avoid