From d6711800ad261fb4c37fc361bc84918d1e296bc4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 9 Jun 2014 18:01:30 -0400 Subject: [PATCH] avoid bad commits after interrupted direct mode sync (or merge) It was possible for a interrupted sync or merge in direct mode to leave the work tree out of sync with the last recorded commit. This would result in the next commit seeing files missing from the work tree, and committing their removal. Now, a direct mode merge happens not only in a throwaway work tree, but using a temporary index file, and without any commits or index changes being made until the real work tree has been updated. If the merge is interrupted, the work tree may have some updated files, but worst case a commit will redundantly commit changes that come from the merge. This commit was sponsored by Tony Cantor. --- Annex/AutoMerge.hs | 31 ++++---- Annex/Direct.hs | 88 +++++++++++++++++------ Git/Branch.hs | 19 ++++- Git/DiffTree.hs | 2 +- Git/Index.hs | 4 ++ Git/Merge.hs | 14 +++- debian/changelog | 5 ++ doc/bugs/direct_mode_merge_interrupt.mdwn | 26 ++++++- 8 files changed, 142 insertions(+), 47 deletions(-) diff --git a/Annex/AutoMerge.hs b/Annex/AutoMerge.hs index 2ed26b78fa..e6f7e04979 100644 --- a/Annex/AutoMerge.hs +++ b/Annex/AutoMerge.hs @@ -17,7 +17,6 @@ import qualified Git.LsFiles as LsFiles import qualified Git.UpdateIndex as UpdateIndex import qualified Git.Merge import qualified Git.Ref -import qualified Git.Sha import qualified Git import Git.Types (BlobType(..)) import Config @@ -38,12 +37,7 @@ autoMergeFrom branch currbranch = do Just b -> go =<< inRepo (Git.Ref.sha b) where go old = ifM isDirect - ( do - d <- fromRepo gitAnnexMergeDir - r <- inRepo (mergeDirect d branch) - <||> resolveMerge old branch - mergeDirectCleanup d (fromMaybe Git.Sha.emptyTree old) Git.Ref.headRef - return r + ( mergeDirect currbranch old branch (resolveMerge old branch) , inRepo (Git.Merge.mergeNonInteractive branch) <||> resolveMerge old branch ) @@ -70,9 +64,11 @@ autoMergeFrom branch currbranch = do - - In indirect mode, the merge is resolved in the work tree and files - staged, to clean up from a conflicted merge that was run in the work - - tree. In direct mode, the work tree is not touched here; files are - - staged to the index, and written to the gitAnnexMergeDir, and later - - mergeDirectCleanup handles updating the work tree. + - tree. The resolution is committed. + - + - In direct mode, the work tree is not touched here, and no commit is made; + - files are staged to the index, and written to the gitAnnexMergeDir, and + - later mergeDirectCleanup handles updating the work tree. -} resolveMerge :: Maybe Git.Ref -> Git.Ref -> Annex Bool resolveMerge us them = do @@ -92,14 +88,13 @@ resolveMerge us them = do unlessM isDirect $ cleanConflictCruft mergedfs top Annex.Queue.flush - whenM isDirect $ - void preCommitDirect - void $ inRepo $ Git.Command.runBool - [ Param "commit" - , Param "--no-verify" - , Param "-m" - , Param "git-annex automatic merge conflict fix" - ] + unlessM isDirect $ do + void $ inRepo $ Git.Command.runBool + [ Param "commit" + , Param "--no-verify" + , Param "-m" + , Param "git-annex automatic merge conflict fix" + ] showLongNote "Merge conflict was automatically resolved; you may want to examine the result." return merged diff --git a/Annex/Direct.hs b/Annex/Direct.hs index 2f583fd943..029bc16d77 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -34,6 +34,8 @@ import Annex.Perms import Annex.ReplaceFile import Annex.Exception import Annex.VariantFile +import Git.Index +import Annex.Index {- Uses git ls-files to find files that need to be committed, and stages - them into the index. Returns True if some changes were staged. -} @@ -141,21 +143,63 @@ addDirect file cache = do ) {- In direct mode, git merge would usually refuse to do anything, since it - - sees present direct mode files as type changed files. To avoid this, - - merge is run with the work tree set to a temp directory. + - sees present direct mode files as type changed files. + - + - So, to handle a merge, it's run with the work tree set to a temp + - directory, and the merge is staged into a copy of the index. + - Then the work tree is updated to reflect the merge, and + - finally, the merge is committed and the real index updated. -} -mergeDirect :: FilePath -> Git.Ref -> Git.Repo -> IO Bool -mergeDirect d branch g = do - whenM (doesDirectoryExist d) $ - removeDirectoryRecursive d - createDirectoryIfMissing True d - let g' = g { location = Local { gitdir = Git.localGitDir g, worktree = Just d } } - Git.Merge.mergeNonInteractive branch g' +mergeDirect :: Maybe Git.Ref -> Maybe Git.Ref -> Git.Branch -> Annex Bool -> Annex Bool +mergeDirect startbranch oldref branch resolvemerge = do + -- Use the index lock file as the temp index file. + -- This is actually what git does when updating the index, + -- and so it will prevent other git processes from making + -- any changes to the index while our merge is in progress. + reali <- fromRepo indexFile + tmpi <- fromRepo indexFileLock + liftIO $ copyFile reali tmpi -{- Cleans up after a direct mode merge. The merge must have been committed, - - and the commit sha passed in, along with the old sha of the tree - - before the merge. Uses git diff-tree to find files that changed between - - the two shas, and applies those changes to the work tree. + d <- fromRepo gitAnnexMergeDir + liftIO $ do + whenM (doesDirectoryExist d) $ + removeDirectoryRecursive d + createDirectoryIfMissing True d + + withIndexFile tmpi $ do + r <- inRepo (mergein d) <||> resolvemerge + mergeDirectCleanup d (fromMaybe Git.Sha.emptyTree oldref) + mergeDirectCommit startbranch branch + liftIO $ rename tmpi reali + return r + where + mergein d g = Git.Merge.stageMerge branch $ + g { location = Local { gitdir = Git.localGitDir g, worktree = Just d } } + +{- Commits after a direct mode merge is complete, and after the work + - tree has been updated by mergeDirectCleanup. + -} +mergeDirectCommit :: Maybe Git.Ref -> Git.Branch -> Annex () +mergeDirectCommit old branch = do + void preCommitDirect + gitdir <- fromRepo Git.localGitDir + let merge_head = gitdir "MERGE_HEAD" + let merge_msg = gitdir "MERGE_MSG" + let merge_mode = gitdir "MERGE_MODE" + ifM (maybe (return False) (\o -> inRepo $ Git.Branch.fastForwardable o branch) old) + ( inRepo $ Git.Branch.update Git.Ref.headRef branch -- fast forward + , do + msg <- liftIO $ + catchDefaultIO ("merge " ++ fromRef branch) $ + readFile merge_msg + void $ inRepo $ Git.Branch.commit False msg + Git.Ref.headRef [Git.Ref.headRef, branch] + ) + liftIO $ mapM_ nukeFile [merge_head, merge_msg, merge_mode] + +{- Cleans up after a direct mode merge. The merge must have been staged + - in the index. Uses diff-index to compare the staged changes with + - the tree before the merge, and applies those changes to the work tree. - - There are really only two types of changes: An old item can be deleted, - or a new item added. Two passes are made, first deleting and then @@ -164,9 +208,9 @@ mergeDirect d branch g = do - order, but we cannot add the directory until the file with the - same name is removed.) -} -mergeDirectCleanup :: FilePath -> Git.Ref -> Git.Ref -> Annex () -mergeDirectCleanup d oldsha newsha = do - (items, cleanup) <- inRepo $ DiffTree.diffTreeRecursive oldsha newsha +mergeDirectCleanup :: FilePath -> Git.Ref -> Annex () +mergeDirectCleanup d oldref = do + (items, cleanup) <- inRepo $ DiffTree.diffIndex oldref makeabs <- flip fromTopFilePath <$> gitRepo let fsitems = zip (map (makeabs . DiffTree.file) items) items forM_ fsitems $ @@ -194,12 +238,12 @@ mergeDirectCleanup d oldsha newsha = do - key, it's left alone. - - If the file is already present, and does not exist in the - - oldsha branch, preserve this local file. + - oldref, preserve this local file. - - Otherwise, create the symlink and then if possible, replace it - with the content. -} movein item makeabs k f = unlessM (goodContent k f) $ do - preserveUnannexed item makeabs f oldsha + preserveUnannexed item makeabs f oldref l <- inRepo $ gitAnnexLink f k replaceFile f $ makeAnnexLink l toDirect k f @@ -207,13 +251,13 @@ mergeDirectCleanup d oldsha newsha = do {- Any new, modified, or renamed files were written to the temp - directory by the merge, and are moved to the real work tree. -} movein_raw item makeabs f = do - preserveUnannexed item makeabs f oldsha + preserveUnannexed item makeabs f oldref liftIO $ do createDirectoryIfMissing True $ parentDir f void $ tryIO $ rename (d getTopFilePath (DiffTree.file item)) f {- If the file that's being moved in is already present in the work - - tree, but did not exist in the oldsha branch, preserve this + - tree, but did not exist in the oldref, preserve this - local, unannexed file (or directory), as "variant-local". - - It's also possible that the file that's being moved in @@ -221,7 +265,7 @@ mergeDirectCleanup d oldsha newsha = do - file (not a directory), which should be preserved. -} preserveUnannexed :: DiffTree.DiffTreeItem -> (TopFilePath -> FilePath) -> FilePath -> Ref -> Annex () -preserveUnannexed item makeabs absf oldsha = do +preserveUnannexed item makeabs absf oldref = do whenM (liftIO (collidingitem absf) <&&> unannexed absf) $ liftIO $ findnewname absf 0 checkdirs (DiffTree.file item) @@ -241,7 +285,7 @@ preserveUnannexed item makeabs absf oldsha = do <$> catchMaybeIO (getSymbolicLinkStatus f) unannexed f = (isNothing <$> isAnnexLink f) - <&&> (isNothing <$> catFileDetails oldsha f) + <&&> (isNothing <$> catFileDetails oldref f) findnewname :: FilePath -> Int -> IO () findnewname f n = do diff --git a/Git/Branch.hs b/Git/Branch.hs index d182ceb395..7c7e44d758 100644 --- a/Git/Branch.hs +++ b/Git/Branch.hs @@ -52,7 +52,22 @@ changed origbranch newbranch repo diffs = pipeReadStrict [ Param "log" , Param (fromRef origbranch ++ ".." ++ fromRef newbranch) - , Params "--oneline -n1" + , Param "-n1" + , Param "--pretty=%H" + ] repo + +{- Check if it's possible to fast-forward from the old + - ref to the new ref. + - + - This requires there to be a path from the old to the new. -} +fastForwardable :: Ref -> Ref -> Repo -> IO Bool +fastForwardable old new repo = not . null <$> + pipeReadStrict + [ Param "log" + , Param $ fromRef old ++ ".." ++ fromRef new + , Param "-n1" + , Param "--pretty=%H" + , Param "--ancestry-path" ] repo {- Given a set of refs that are all known to have commits not @@ -74,7 +89,7 @@ fastForward branch (first:rest) repo = where no_ff = return False do_ff to = do - run [Param "update-ref", Param $ fromRef branch, Param $ fromRef to] repo + update branch to repo return True findbest c [] = return $ Just c findbest c (r:rs) diff --git a/Git/DiffTree.hs b/Git/DiffTree.hs index 9e4fef9d62..59de60871c 100644 --- a/Git/DiffTree.hs +++ b/Git/DiffTree.hs @@ -49,7 +49,7 @@ diffIndex :: Ref -> Repo -> IO ([DiffTreeItem], IO Bool) diffIndex ref = diffIndex' ref [Param "--cached"] {- Diffs between a tree and the working tree. Does nothing if there is not - - yet a commit in the repository, of if the repository is bare. -} + - yet a commit in the repository, or if the repository is bare. -} diffWorkTree :: Ref -> Repo -> IO ([DiffTreeItem], IO Bool) diffWorkTree ref repo = ifM (Git.Ref.headExists repo) diff --git a/Git/Index.hs b/Git/Index.hs index d9d5b03bfe..d712245a81 100644 --- a/Git/Index.hs +++ b/Git/Index.hs @@ -30,3 +30,7 @@ override index = do indexFile :: Repo -> FilePath indexFile r = localGitDir r "index" + +{- Git locks the index by creating this file. -} +indexFileLock :: Repo -> FilePath +indexFileLock r = indexFile r ++ ".lock" diff --git a/Git/Merge.hs b/Git/Merge.hs index 948e09e014..d661db9787 100644 --- a/Git/Merge.hs +++ b/Git/Merge.hs @@ -1,6 +1,6 @@ {- git merging - - - Copyright 2012 Joey Hess + - Copyright 2012, 2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -19,3 +19,15 @@ mergeNonInteractive branch | otherwise = merge [Param "--no-edit", Param $ fromRef branch] where merge ps = runBool $ Param "merge" : ps + +{- Stage the merge into the index, but do not commit it.-} +stageMerge :: Ref -> Repo -> IO Bool +stageMerge branch = runBool + [ Param "merge" + , Param "--quiet" + , Param "--no-commit" + -- Without this, a fast-forward merge is done, since it involves no + -- commit. + , Param "--no-ff" + , Param $ fromRef branch + ] diff --git a/debian/changelog b/debian/changelog index 612864faf0..64225ea366 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,11 @@ git-annex (5.20140607) UNRELEASED; urgency=medium * Ignore setsid failures. * Avoid leaving behind .tmp files when failing in some cases, including importing files to a disk that is full. + * direct mode: Avoid committing a merge until after the work tree is + updated. This avoids an interrupted merge leaving the work tree out + of sync with the last commit, which could result in the wrong thing + being committed later, and files appearing to get deleted. + (They could be recovered by reverting the bad commit.) -- Joey Hess Mon, 09 Jun 2014 14:44:09 -0400 diff --git a/doc/bugs/direct_mode_merge_interrupt.mdwn b/doc/bugs/direct_mode_merge_interrupt.mdwn index 50b32929bc..e648481404 100644 --- a/doc/bugs/direct_mode_merge_interrupt.mdwn +++ b/doc/bugs/direct_mode_merge_interrupt.mdwn @@ -17,10 +17,30 @@ mode repo can get them back. To fix this, direct mode merge would need to avoid updating the current branch when merging the remote branch into it (how?). It should first update the whole work tree, and only after it's updated should it update -the current branch to reflect the merge. (I assume this is how `git merge` -normally works.) --[[Joey]] +the index and the current branch to reflect the merge. + +This way, if the merge is interrupted, the work tree may have uncommitted +changed -- but it's fine if they get accidentially committed, since when +the merge is re-done, those changes will by the same ones made by the +merge. (I assume this is how `git merge` normally works.) --[[Joey]] > Implemented that. And then realized that even updating the index > as part of a merge results in the work tree being out of sync with the > index. Which will cause the next sync to again delete any files that -> are in the index but not the work tree. Urgh. --[[Joey]] +> are in the index but not the work tree. Urgh. +> +> Seems that a direct mode +> merge also needs to use a different index file to stage its changes? +> (Ugh) +> > [[done]] --[[Joey]] +> +> Or could perhaps use `git-merge-tree` +> and avoid staging the merge in the index until the work-tree is updated. +> +> Alternatively, could use another strategy.. Add a lock file which is held while +> the merge is in progress and contains the pre-merge sha. +> If the lock file is present but not held, state is inconsistent. +> `git-annex sync` and the SanityChecker should +> then run mergeDirectCleanup to recover, before any commits can be made +> from the inconsistent state. This approach seems to get complicated +> quickly.. --[[Joey]]