From 4c918437abcc5d1463ac1b0456642ab66dbc9cdc Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 6 Aug 2018 17:22:12 -0400 Subject: [PATCH] Fix git-annex branch data loss that could occur after git-annex forget --drop-dead Added getStaged, to get the versions of git-annex branch files staged in its index, and use during transitions so the result of merging sibling branches is used. The catFileStop in performTransitionsLocked is absolutely necessary, without that the bug still occurred, because git cat-file was already running and was looking at the old index file. Note that getLocal still has cat-file look at the git-annex branch, not the index. It might be faster if it looked at the index, but probably only marginally so, and I've not benchmarked it to see if it's faster at all. I didn't want to change unrelated behavior as part of this bug fix. And as the need for catFileStop shows, using the index file has added complications. Anyway, it still seems fine for getLocal to look at the git-annex branch, because normally the index file is updated just before the git-annex branch is committed, and so they'll contain the same information. It's only during a transition that the two diverge. This commit was sponsored by Paul Walmsley in honor of Mark Phillips. --- Annex/Branch.hs | 21 +++++++++++----- CHANGELOG | 2 ++ ...-drop-dead_can_lose_group_information.mdwn | 2 ++ ..._f4b78aeaf9163b374254c08057222661._comment | 25 +++++++++++++++++++ ..._1468e26d2da85bcaf50715fbe3c38490._comment | 19 ++++++++++++++ 5 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information/comment_1_f4b78aeaf9163b374254c08057222661._comment create mode 100644 doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information/comment_2_1468e26d2da85bcaf50715fbe3c38490._comment diff --git a/Annex/Branch.hs b/Annex/Branch.hs index a3945a18c1..e465b75326 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -223,10 +223,15 @@ getLocal :: FilePath -> Annex String getLocal file = go =<< getJournalFileStale file where go (Just journalcontent) = return journalcontent - go Nothing = getRaw file + go Nothing = getRef fullname file -getRaw :: FilePath -> Annex String -getRaw = getRef fullname +{- Gets the content of a file as staged in the branch's index. -} +getStaged :: FilePath -> Annex String +getStaged = getRef indexref + where + -- This makes git cat-file be run with ":file", + -- so it looks at the index. + indexref = Ref "" getHistorical :: RefDate -> FilePath -> Annex String getHistorical date file = @@ -533,6 +538,10 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do -- update the git-annex branch, while it usually holds changes -- for the head branch. Flush any such changes. Annex.Queue.flush + -- Stop any running git cat-files, to ensure that the + -- getStaged calls below use the current index, and not some older + -- one. + catFileStop withIndex $ do prepareModifyIndex jl run $ mapMaybe getTransitionCalculator tlist @@ -557,15 +566,15 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do - to hold changes to every file in the branch at once.) - - When a file in the branch is changed by transition code, - - that value is remembered and fed into the code for subsequent + - its new content is remembered and fed into the code for subsequent - transitions. -} run [] = noop run changers = do - trustmap <- calcTrustMap <$> getRaw trustLog + trustmap <- calcTrustMap <$> getStaged trustLog fs <- branchFiles forM_ fs $ \f -> do - content <- getRaw f + content <- getStaged f apply changers f content trustmap apply [] _ _ _ = return () apply (changer:rest) file content trustmap = diff --git a/CHANGELOG b/CHANGELOG index 8e1b0eb1f1..908c42b03f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -11,6 +11,8 @@ git-annex (6.20180720) UNRELEASED; urgency=medium * Fix wrong sorting of remotes when using -J, it was sorting by uuid, rather than cost. * addurl: Include filename in --json-progress output. + * Fix git-annex branch data loss that could occur after + git-annex forget --drop-dead. -- Joey Hess Tue, 31 Jul 2018 12:14:11 -0400 diff --git a/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information.mdwn b/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information.mdwn index c9e94389e6..5c76e9494f 100644 --- a/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information.mdwn +++ b/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information.mdwn @@ -152,3 +152,5 @@ B IS IN GROUP: ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) Yes, git-annex is slowly replacing all of my other sync and backup systems I've cobbled together over the years. + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information/comment_1_f4b78aeaf9163b374254c08057222661._comment b/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information/comment_1_f4b78aeaf9163b374254c08057222661._comment new file mode 100644 index 0000000000..75bd9bd1ac --- /dev/null +++ b/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information/comment_1_f4b78aeaf9163b374254c08057222661._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2018-08-06T19:49:06Z" + content=""" +Great bug report! + +There is a general data loss problem in this scenario, it's not specific to +group changes at all. Changes that were only present in a sibling git-annex +branch are not being preserved when the repository updates its git-annex +branch index file for a transition. + +The index file lacking those changes then gets committed with the sibling +branches as parent(s). So the changes are effectively reverted. + +The root cause is that the handleTransitions uses getRaw +to get the contents of files. That uses git cat-file git-annex:$file, which +gets the version last committed to the git-annex branch, +not the version from the git-annex branch index file. And handleTransitions is +run after all sibling branches have been union merged in the index file +but not committed yet. + +The fix is to instead use git cat-file :$file, so it will get the version +from the index. +"""]] diff --git a/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information/comment_2_1468e26d2da85bcaf50715fbe3c38490._comment b/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information/comment_2_1468e26d2da85bcaf50715fbe3c38490._comment new file mode 100644 index 0000000000..8fe3936bc1 --- /dev/null +++ b/doc/bugs/git_annex_forget_--drop-dead_can_lose_group_information/comment_2_1468e26d2da85bcaf50715fbe3c38490._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2018-08-06T21:26:36Z" + content=""" +If you lost things from the git-annex branch due this bug, +you can find the commit that contained them by `git log git-annex`, +and look for the commit before the "continuing transition" commit. + +It's then possible to get those changes applied back to the git-annex +brannch; there should be no permanent data loss due to this bug. + +Eg, here the commit that contained the lost group change was +261d1be6a2093f1e4059ed3030016c365f29413f. To get that back into the +git-annex branch, I ran: + + git update-ref git-annex 261d1be6a2093f1e4059ed3030016c365f29413f + git annex merge +"""]]