disable journal read optimisation when alwayscommit=false

The journal read optimisation in aeca7c220 later got fixed in eedd73b84
to stage and commit any files that were left in the journal by a
previous git-annex run. That's necessary for the optimisation to work
correctly. But it also meant that alwayscommit=false started committing
the previous git-annex processes journalled changes, which defeated the
purpose of the config setting entirely.

So, disable the optimisation when alwayscommit=false, leaving the
files in the journal and not committing them. See my comments on the bug
report for why this seemed the best approach.

Also fixes a problem when annex.merge-annex-branches=false and there
are changes in the journal. That config indirectly prevents committing
the journal. (Which seems a bit odd given its name, but it always has..)
So, when there were changes in the journal, perhaps left there due to
alwayscommit=false being set before, the optimisation would prevent
git-annex from reading the journal files, and it would operate with out
of date information.
This commit is contained in:
Joey Hess 2020-04-15 13:04:34 -04:00
parent 0e4c92503e
commit 43a9808292
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 46 additions and 26 deletions

View file

@ -1,6 +1,6 @@
{- management of the git-annex branch {- management of the git-annex branch
- -
- Copyright 2011-2019 Joey Hess <id@joeyh.name> - Copyright 2011-2020 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -14,6 +14,7 @@ module Annex.Branch (
hasSibling, hasSibling,
siblingBranches, siblingBranches,
create, create,
UpdateMade(..),
update, update,
forceUpdate, forceUpdate,
updateTo, updateTo,
@ -125,12 +126,17 @@ getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha
{- Ensures that the branch and index are up-to-date; should be {- Ensures that the branch and index are up-to-date; should be
- called before data is read from it. Runs only once per git-annex run. -} - called before data is read from it. Runs only once per git-annex run. -}
update :: Annex BranchState update :: Annex BranchState
update = runUpdateOnce $ void $ updateTo =<< siblingBranches update = runUpdateOnce $ journalClean <$$> updateTo =<< siblingBranches
{- Forces an update even if one has already been run. -} {- Forces an update even if one has already been run. -}
forceUpdate :: Annex Bool forceUpdate :: Annex UpdateMade
forceUpdate = updateTo =<< siblingBranches forceUpdate = updateTo =<< siblingBranches
data UpdateMade = UpdateMade
{ refsWereMerged :: Bool
, journalClean :: Bool
}
{- Merges the specified Refs into the index, if they have any changes not {- Merges the specified Refs into the index, if they have any changes not
- already in it. The Branch names are only used in the commit message; - already in it. The Branch names are only used in the commit message;
- it's even possible that the provided Branches have not been updated to - it's even possible that the provided Branches have not been updated to
@ -149,13 +155,13 @@ forceUpdate = updateTo =<< siblingBranches
- -
- Returns True if any refs were merged in, False otherwise. - Returns True if any refs were merged in, False otherwise.
-} -}
updateTo :: [(Git.Sha, Git.Branch)] -> Annex Bool updateTo :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade
updateTo pairs = ifM (annexMergeAnnexBranches <$> Annex.getGitConfig) updateTo pairs = ifM (annexMergeAnnexBranches <$> Annex.getGitConfig)
( updateTo' pairs ( updateTo' pairs
, return False , return (UpdateMade False False)
) )
updateTo' :: [(Git.Sha, Git.Branch)] -> Annex Bool updateTo' :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade
updateTo' pairs = do updateTo' pairs = do
-- ensure branch exists, and get its current ref -- ensure branch exists, and get its current ref
branchref <- getBranch branchref <- getBranch
@ -167,7 +173,7 @@ updateTo' pairs = do
else do else do
mergedrefs <- getMergedRefs mergedrefs <- getMergedRefs
filterM isnewer (excludeset mergedrefs unignoredrefs) filterM isnewer (excludeset mergedrefs unignoredrefs)
if null tomerge journalclean <- if null tomerge
{- Even when no refs need to be merged, the index {- Even when no refs need to be merged, the index
- may still be updated if the branch has gotten ahead - may still be updated if the branch has gotten ahead
- of the index, or just if the journal is dirty. -} - of the index, or just if the journal is dirty. -}
@ -179,11 +185,23 @@ updateTo' pairs = do
- a commit needs to be done. -} - a commit needs to be done. -}
when dirty $ when dirty $
go branchref dirty [] jl go branchref dirty [] jl
, when dirty $ return True
lockJournal $ go branchref dirty [] , if dirty
then ifM (annexAlwaysCommit <$> Annex.getGitConfig)
( do
lockJournal $ go branchref dirty []
return True
, return False
)
else return True
) )
else lockJournal $ go branchref dirty tomerge else do
return $ not $ null tomerge lockJournal $ go branchref dirty tomerge
return True
return $ UpdateMade
{ refsWereMerged = not (null tomerge)
, journalClean = journalclean
}
where where
excludeset s = filter (\(r, _) -> S.notMember r s) excludeset s = filter (\(r, _) -> S.notMember r s)
isnewer (r, _) = inRepo $ Git.Branch.changed fullname r isnewer (r, _) = inRepo $ Git.Branch.changed fullname r

View file

@ -28,26 +28,24 @@ checkIndexOnce a = unlessM (indexChecked <$> getState) $ do
changeState $ \s -> s { indexChecked = True } changeState $ \s -> s { indexChecked = True }
{- Runs an action to update the branch, if it's not been updated before {- Runs an action to update the branch, if it's not been updated before
- in this run of git-annex. -} - in this run of git-annex.
runUpdateOnce :: Annex () -> Annex BranchState -
- The action should return True if anything that was in the journal
- before got staged (or if the journal was empty). That lets an opmisation
- be done: The journal then does not need to be checked going forward,
- until new information gets written to it.
-}
runUpdateOnce :: Annex Bool -> Annex BranchState
runUpdateOnce a = do runUpdateOnce a = do
st <- getState st <- getState
if branchUpdated st if branchUpdated st
then return st then return st
else do else do
a journalstaged <- a
let stf = \st' -> st' let stf = \st' -> st'
{ branchUpdated = True { branchUpdated = True
-- The update staged anything that was , journalIgnorable = journalstaged
-- journalled before, so the journal && not (journalNeverIgnorable st')
-- does not need to be checked going
-- forward, unless new information
-- gets written to it, or unless
-- this run of git-annex needs to notice
-- changes journalled by other processes
-- while it's running.
, journalIgnorable = not $
journalNeverIgnorable st'
} }
changeState stf changeState stf
return (stf st) return (stf st)

View file

@ -216,7 +216,8 @@ manualPull currentbranch remotes = do
, return $ Just r , return $ Just r
) )
else return Nothing else return Nothing
haddiverged <- liftAnnex Annex.Branch.forceUpdate haddiverged <- Annex.Branch.refsWereMerged
<$> liftAnnex Annex.Branch.forceUpdate
forM_ remotes $ \r -> forM_ remotes $ \r ->
liftAnnex $ Command.Sync.mergeRemote r liftAnnex $ Command.Sync.mergeRemote r
currentbranch Command.Sync.mergeConfig def currentbranch Command.Sync.mergeConfig def

View file

@ -65,7 +65,8 @@ onChange file
| ".lock" `isSuffixOf` file = noop | ".lock" `isSuffixOf` file = noop
| isAnnexBranch file = do | isAnnexBranch file = do
branchChanged branchChanged
diverged <- liftAnnex Annex.Branch.forceUpdate diverged <- Annex.Branch.refsWereMerged
<$> liftAnnex Annex.Branch.forceUpdate
when diverged $ do when diverged $ do
updateExportTreeFromLogAll updateExportTreeFromLogAll
queueDeferredDownloads "retrying deferred download" Later queueDeferredDownloads "retrying deferred download" Later

View file

@ -91,3 +91,5 @@ index 712c5c10f..f248b3eb4 100644
[[!tag projects/datalad]] [[!tag projects/datalad]]
[0]: https://github.com/datalad/datalad-extensions/pull/9 [0]: https://github.com/datalad/datalad-extensions/pull/9
> [[fixed|done]] --[[Joey]]