From 7fce85adac077d7e6843740a3b620c8995f75dbd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 9 Feb 2015 18:34:48 -0400 Subject: [PATCH] Improve race recovery code when committing to git-annex branch. --- Annex/Branch.hs | 39 +++++++++++------- debian/changelog | 1 + ..._c97926b15ba320f57a6441f9844cb139._comment | 7 ++++ ..._3b70a60ef1c47871a3933176eac38174._comment | 40 +++++++++++++++++++ ..._1d951126a9633b206dffbc77bfc65f6a._comment | 12 ++++++ 5 files changed, 84 insertions(+), 15 deletions(-) create mode 100644 doc/bugs/git-annex_branch_shows_commit_with_looong_commitlog/comment_5_c97926b15ba320f57a6441f9844cb139._comment create mode 100644 doc/bugs/git-annex_branch_shows_commit_with_looong_commitlog/comment_6_3b70a60ef1c47871a3933176eac38174._comment create mode 100644 doc/forum/repair_stuck_on_ls-tree_command/comment_11_1d951126a9633b206dffbc77bfc65f6a._comment diff --git a/Annex/Branch.hs b/Annex/Branch.hs index c672703126..6ce711996e 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -29,6 +29,7 @@ import qualified Data.ByteString.Lazy as L import qualified Data.Set as S import qualified Data.Map as M import Data.Bits.Utils +import Control.Concurrent (threadDelay) import Common.Annex import Annex.BranchState @@ -232,28 +233,32 @@ forceCommit message = lockJournal $ \jl -> do {- Commits the staged changes in the index to the branch. - - - Ensures that the branch's index file is first updated to the state + - Ensures that the branch's index file is first updated to merge the state - of the branch at branchref, before running the commit action. This - is needed because the branch may have had changes pushed to it, that - are not yet reflected in the index. - - - - Also safely handles a race that can occur if a change is being pushed - - into the branch at the same time. When the race happens, the commit will - - be made on top of the newly pushed change, but without the index file - - being updated to include it. The result is that the newly pushed - - change is reverted. This race is detected and another commit made - - to fix it. - - The branchref value can have been obtained using getBranch at any - previous point, though getting it a long time ago makes the race - more likely to occur. + - + - Note that changes may be pushed to the branch at any point in time! + - So, there's a race. If the commit is made using the newly pushed tip of + - the branch as its parent, and that ref has not yet been merged into the + - index, then the result is that the commit will revert the pushed + - changes, since they have not been merged into the index. This race + - is detected and another commit made to fix it. + - + - (It's also possible for the branch to be overwritten, + - losing the commit made here. But that's ok; the data is still in the + - index and will get committed again later.) -} commitIndex :: JournalLocked -> Git.Ref -> String -> [Git.Ref] -> Annex () commitIndex jl branchref message parents = do showStoringStateAction - commitIndex' jl branchref message parents -commitIndex' :: JournalLocked -> Git.Ref -> String -> [Git.Ref] -> Annex () -commitIndex' jl branchref message parents = do + commitIndex' jl branchref message message 0 parents +commitIndex' :: JournalLocked -> Git.Ref -> String -> String -> Integer -> [Git.Ref] -> Annex () +commitIndex' jl branchref message basemessage retrynum parents = do updateIndex jl branchref committedref <- inRepo $ Git.Branch.commitAlways Git.Branch.AutomaticCommit message fullname parents setIndexSha committedref @@ -276,12 +281,16 @@ commitIndex' jl branchref message parents = do | otherwise = True -- race! {- To recover from the race, union merge the lost refs - - into the index, and recommit on top of the bad commit. -} + - into the index. -} fixrace committedref lostrefs = do + showSideAction "recovering from race" + let retrynum' = retrynum+1 + -- small sleep to let any activity that caused + -- the race settle down + liftIO $ threadDelay (100000 + fromInteger retrynum') mergeIndex jl lostrefs - commitIndex jl committedref racemessage [committedref] - - racemessage = message ++ " (recovery from race)" + let racemessage = basemessage ++ " (recovery from race #" ++ show retrynum' ++ "; expected commit parent " ++ show branchref ++ " but found " ++ show lostrefs ++ " )" + commitIndex' jl committedref racemessage basemessage retrynum' [committedref] {- Lists all files on the branch. There may be duplicates in the list. -} files :: Annex [FilePath] diff --git a/debian/changelog b/debian/changelog index 1fa7b80555..6809ef1d60 100644 --- a/debian/changelog +++ b/debian/changelog @@ -17,6 +17,7 @@ git-annex (5.20150206) UNRELEASED; urgency=medium * webapp: Fix reversion in opening webapp when starting it manually inside a repository. * assistant: Improve sanity check for control characters when pairing. + * Improve race recovery code when committing to git-annex branch. -- Joey Hess Fri, 06 Feb 2015 13:57:08 -0400 diff --git a/doc/bugs/git-annex_branch_shows_commit_with_looong_commitlog/comment_5_c97926b15ba320f57a6441f9844cb139._comment b/doc/bugs/git-annex_branch_shows_commit_with_looong_commitlog/comment_5_c97926b15ba320f57a6441f9844cb139._comment new file mode 100644 index 0000000000..62903f1f16 --- /dev/null +++ b/doc/bugs/git-annex_branch_shows_commit_with_looong_commitlog/comment_5_c97926b15ba320f57a6441f9844cb139._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2015-02-09T21:10:58Z" + content=""" +I guess the thing to do in this case is to run `git annex forget` +"""]] diff --git a/doc/bugs/git-annex_branch_shows_commit_with_looong_commitlog/comment_6_3b70a60ef1c47871a3933176eac38174._comment b/doc/bugs/git-annex_branch_shows_commit_with_looong_commitlog/comment_6_3b70a60ef1c47871a3933176eac38174._comment new file mode 100644 index 0000000000..25e6059f10 --- /dev/null +++ b/doc/bugs/git-annex_branch_shows_commit_with_looong_commitlog/comment_6_3b70a60ef1c47871a3933176eac38174._comment @@ -0,0 +1,40 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 6""" + date="2015-02-09T21:59:31Z" + content=""" +[[forum/repair_stuck_on_ls-tree_command]] is another case of that, and I got ahold of +that repository for analysis. + +In that case, there was indeed an inverse pyramid effect where each commit +added one more " (recovery from race)" to its parent commit. + +The code can clearly loop +if it keeps detecting a race and somehow fails to recover from it. Leading +to a whole stack of commits with progressively longer messages. +I don't see any way to get just one commit with a long message, which +comment #1 seems to say happened. + +Apparently loops for a while and then succeeds in recovering from +the race, since it then stops looping. + +I have added additional debug info to the commit message, in hopes of detecting +what might be going wrong that causes it to loop. + +Seems to me there are two possibilities. + +One is that something else is continually changing the git-annex +branch in a way that keeps triggering the race. If so, it might make +sense for git-annex to do a brief random sleep (a few hundredths of a +second) before recovering, to let whatever it is quiet down. I've done so. + +The other is some kind of bug where it detects a race when none +occurred. Perhaps it's misparsing the commit or git cat-file is failing +to output it, and so it's not finding the expected parent refs, for example. +But in that case, why would it detect a race for many commits +in a row, and then eventually not detect a race anymore? + +Also, I've made these messages no longer stack up even if it does go into a +loop, which will at least help with the object size bloat, though not with the +number of commits bloat. +"""]] diff --git a/doc/forum/repair_stuck_on_ls-tree_command/comment_11_1d951126a9633b206dffbc77bfc65f6a._comment b/doc/forum/repair_stuck_on_ls-tree_command/comment_11_1d951126a9633b206dffbc77bfc65f6a._comment new file mode 100644 index 0000000000..a569fcbdff --- /dev/null +++ b/doc/forum/repair_stuck_on_ls-tree_command/comment_11_1d951126a9633b206dffbc77bfc65f6a._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 11""" + date="2015-02-09T21:13:28Z" + content=""" +Finally got back to this. I downloaded the file. + +You may be able to fix your repository by running `git annex forget` + +I guess this is the same problem described in +[[bugs/git-annex_branch_shows_commit_with_looong_commitlog]] +"""]]