atomic git-annex branch update when regrafting in transition

Fix a bug where interrupting git-annex while it is updating the git-annex
branch could lead to git fsck complaining about missing tree objects.

Interrupting git-annex while regraftexports is running in a transition
that is forgetting git-annex branch history would leave the
repository with a git-annex branch that did not contain the tree shas
listed in export.log. That lets those trees be garbage collected.

A subsequent run of the same transition then regrafts the trees listed
in export.log into the git-annex branch. But those trees have been lost.

Note that both sides of `if neednewlocalbranch` are atomic now. I had
thought only the True side needed to be, but I do think there may be
cases where the False side needs to be as well.

Sponsored-by: Dartmouth College's OpenNeuro project
This commit is contained in:
Joey Hess 2024-06-07 15:59:54 -04:00
parent f5532be954
commit b32c4c2e98
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 78 additions and 27 deletions

View file

@ -818,12 +818,18 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do
if neednewlocalbranch
then do
cmode <- annexCommitMode <$> Annex.getGitConfig
committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname transitionedrefs
setIndexSha committedref
-- Creating a new empty branch must happen
-- atomically, so if this is interrupted,
-- it will not leave the new branch created
-- but without exports grafted in.
c <- inRepo $ Git.Branch.commitShaAlways
cmode message transitionedrefs
void $ regraftexports c
else do
ref <- getBranch
commitIndex jl ref message (nub $ fullname:transitionedrefs)
regraftexports
ref' <- regraftexports ref
commitIndex jl ref' message
(nub $ fullname:transitionedrefs)
where
message
| neednewlocalbranch && null transitionedrefs = "new branch for transition " ++ tdesc
@ -872,13 +878,21 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do
apply rest file content'
-- Trees mentioned in export.log were grafted into the old
-- git-annex branch to make sure they remain available. Re-graft
-- the trees into the new branch.
regraftexports = do
-- git-annex branch to make sure they remain available.
-- Re-graft the trees.
regraftexports parent = do
l <- exportedTreeishes . M.elems . parseExportLogMap
<$> getStaged exportLog
forM_ l $ \t ->
rememberTreeishLocked t (asTopFilePath exportTreeGraftPoint) jl
c <- regraft l parent
inRepo $ Git.Branch.update' fullname c
setIndexSha c
return c
where
regraft [] c = pure c
regraft (et:ets) c =
prepRememberTreeish et graftpoint c
>>= regraft ets
graftpoint = asTopFilePath exportTreeGraftPoint
checkBranchDifferences :: Git.Ref -> Annex ()
checkBranchDifferences ref = do
@ -935,26 +949,29 @@ getMergedRefs' = do
- Returns the sha of the git commit made to the git-annex branch.
-}
rememberTreeish :: Git.Ref -> TopFilePath -> Annex Git.Sha
rememberTreeish treeish graftpoint = lockJournal $
rememberTreeishLocked treeish graftpoint
rememberTreeishLocked :: Git.Ref -> TopFilePath -> JournalLocked -> Annex Git.Sha
rememberTreeishLocked treeish graftpoint jl = do
rememberTreeish treeish graftpoint = lockJournal $ \jl -> do
branchref <- getBranch
updateIndex jl branchref
c <- prepRememberTreeish treeish graftpoint branchref
inRepo $ Git.Branch.update' fullname c
-- The tree in c is the same as the tree in branchref,
-- and the index was updated to that above, so it's safe to
-- say that the index contains c.
setIndexSha c
return c
{- Create a series of commits that graft a tree onto the parent commit,
- and then remove it. -}
prepRememberTreeish :: Git.Ref -> TopFilePath -> Git.Ref -> Annex Git.Sha
prepRememberTreeish treeish graftpoint parent = do
origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$>
inRepo (Git.Ref.tree branchref)
inRepo (Git.Ref.tree parent)
addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree
cmode <- annexCommitMode <$> Annex.getGitConfig
c <- inRepo $ Git.Branch.commitTree cmode
["graft"] [branchref] addedt
c' <- inRepo $ Git.Branch.commitTree cmode
["graft"] [parent] addedt
inRepo $ Git.Branch.commitTree cmode
["graft cleanup"] [c] origtree
inRepo $ Git.Branch.update' fullname c'
-- The tree in c' is the same as the tree in branchref,
-- and the index was updated to that above, so it's safe to
-- say that the index contains c'.
setIndexSha c'
return c'
{- Runs an action on the content of selected files from the branch.
- This is much faster than reading the content of each file in turn,

View file

@ -1,5 +1,8 @@
git-annex (10.20240532) UNRELEASED; urgency=medium
* Fix a bug where interrupting git-annex while it is updating the
git-annex branch for an export could later lead to git fsck
complaining about missing tree objects.
* Fix Windows build with Win32 2.13.4+
Thanks, Oleg Tolmatcev

View file

@ -178,13 +178,25 @@ commitCommand' runner commitmode commitquiet ps =
- in any way, or output a summary.
-}
commit :: CommitMode -> Bool -> String -> Branch -> [Ref] -> Repo -> IO (Maybe Sha)
commit commitmode allowempty message branch parentrefs repo = do
tree <- writeTree repo
ifM (cancommit tree)
( do
sha <- commitTree commitmode [message] parentrefs tree repo
commit commitmode allowempty message branch parentrefs repo =
commitSha commitmode allowempty message parentrefs repo >>= \case
Just sha -> do
update' branch sha repo
return $ Just sha
Nothing -> return Nothing
where
cancommit tree
| allowempty = return True
| otherwise = case parentrefs of
[p] -> maybe False (tree /=) <$> Git.Ref.tree p repo
_ -> return True
{- Same as commit but without updating any branch. -}
commitSha :: CommitMode -> Bool -> String -> [Ref] -> Repo -> IO (Maybe Sha)
commitSha commitmode allowempty message parentrefs repo = do
tree <- writeTree repo
ifM (cancommit tree)
( Just <$> commitTree commitmode [message] parentrefs tree repo
, return Nothing
)
where
@ -198,6 +210,10 @@ commitAlways :: CommitMode -> String -> Branch -> [Ref] -> Repo -> IO Sha
commitAlways commitmode message branch parentrefs repo = fromJust
<$> commit commitmode True message branch parentrefs repo
commitShaAlways :: CommitMode -> String -> [Ref] -> Repo -> IO Sha
commitShaAlways commitmode message parentrefs repo = fromJust
<$> commitSha commitmode True message parentrefs repo
-- Throws exception if the index is locked, with an error message output by
-- git on stderr.
writeTree :: Repo -> IO Sha

View file

@ -0,0 +1,15 @@
[[!comment format=mdwn
username="joey"
subject="""comment 8"""
date="2024-06-07T17:59:43Z"
content="""
Fixed performTransitionsLocked to create the new git-annex branch
atomically.
Found another way this could happen, interrupting `git-annex export` after
it writes export.log but before it grafts the tree into the git-annex
branch. Fixed that one too.
So hopefully this won't happen to any more repositories with these fixes.
Still leaves the question of how to recover from the problem.
"""]]