merge git-annex branch in memory in read-only repository
Improved support for using git-annex in a read-only repository, git-annex branch information from remotes that cannot be merged into the git-annex branch will now not crash it, but will be merged in memory. To avoid this making git-annex behave one way in a read-only repository, and another way when it can write, it's important that Annex.Branch.get return the same thing (modulo log file compaction) in both cases. This manages that mostly. There are some exceptions: - When there is a transition in one of the remote git-annex branches that has not yet been applied to the local or other git-annex branches. Transitions are not handled. - `git-annex log` runs git log on the git-annex branch, and so it will not be able to show information coming from the other, not yet merged branches. - Annex.Branch.files only looks at files in the git-annex branch and not unmerged branches. This affects git-annex info output. - Annex.Branch.hs.overBranchFileContents ditto. Affects --all and also importfeed (but importfeed cannot work in a read-only repo anyway). - CmdLine.Seek.seekFilteredKeys when precaching location logs. Note use of Annex.Branch.fullname - Database.ContentIdentifier.needsUpdateFromLog and updateFromLog These warts make this not suitable to be merged yet. This readonly code path is more expensive, since it has to query several branches. The value does get cached, but still large queries will be slower in a read-only repository when there are unmerged git-annex branches. When annex.merge-annex-branches=false, updateTo skips doing anything, and so the read-only repository code does not get triggered. So a user who is bothered by the extra work can set that. Other writes to the repository can still result in permissions errors. This includes the initial creation of the git-annex branch, and of course any writes to the git-annex branch. Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
da6aa6e944
commit
6d7ecd9e5d
6 changed files with 124 additions and 82 deletions
|
@ -141,17 +141,12 @@ getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha
|
|||
{- Ensures that the branch and index are up-to-date; should be
|
||||
- called before data is read from it. Runs only once per git-annex run. -}
|
||||
update :: Annex BranchState
|
||||
update = runUpdateOnce $ journalClean <$$> updateTo =<< siblingBranches
|
||||
update = runUpdateOnce $ updateTo =<< siblingBranches
|
||||
|
||||
{- Forces an update even if one has already been run. -}
|
||||
forceUpdate :: Annex UpdateMade
|
||||
forceUpdate = updateTo =<< siblingBranches
|
||||
|
||||
data UpdateMade = UpdateMade
|
||||
{ refsWereMerged :: Bool
|
||||
, journalClean :: Bool
|
||||
}
|
||||
|
||||
{- Merges the specified Refs into the index, if they have any changes not
|
||||
- already in it. The Branch names are only used in the commit message;
|
||||
- it's even possible that the provided Branches have not been updated to
|
||||
|
@ -167,8 +162,6 @@ data UpdateMade = UpdateMade
|
|||
-
|
||||
- Also handles performing any Transitions that have not yet been
|
||||
- performed, in either the local branch, or the Refs.
|
||||
-
|
||||
- Returns True if any refs were merged in, False otherwise.
|
||||
-}
|
||||
updateTo :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade
|
||||
updateTo pairs = ifM (annexMergeAnnexBranches <$> Annex.getGitConfig)
|
||||
|
@ -180,7 +173,6 @@ updateTo' :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade
|
|||
updateTo' pairs = do
|
||||
-- ensure branch exists, and get its current ref
|
||||
branchref <- getBranch
|
||||
dirty <- journalDirty gitAnnexJournalDir
|
||||
ignoredrefs <- getIgnoredRefs
|
||||
let unignoredrefs = excludeset ignoredrefs pairs
|
||||
tomerge <- if null unignoredrefs
|
||||
|
@ -188,6 +180,17 @@ updateTo' pairs = do
|
|||
else do
|
||||
mergedrefs <- getMergedRefs
|
||||
filterM isnewer (excludeset mergedrefs unignoredrefs)
|
||||
{- In a read-only repository, catching permission denied lets
|
||||
- query operations still work, although they will need to do
|
||||
- additional work since the refs are not merged. -}
|
||||
catchPermissionDenied
|
||||
(const (return (UpdateFailedPermissions (branchref : map fst tomerge))))
|
||||
(go branchref tomerge)
|
||||
where
|
||||
excludeset s = filter (\(r, _) -> S.notMember r s)
|
||||
isnewer (r, _) = inRepo $ Git.Branch.changed fullname r
|
||||
go branchref tomerge = do
|
||||
dirty <- journalDirty gitAnnexJournalDir
|
||||
journalcleaned <- if null tomerge
|
||||
{- Even when no refs need to be merged, the index
|
||||
- may still be updated if the branch has gotten ahead
|
||||
|
@ -199,19 +202,19 @@ updateTo' pairs = do
|
|||
- as well as the branch being updated,
|
||||
- a commit needs to be done. -}
|
||||
when dirty $
|
||||
go branchref dirty [] jl
|
||||
go' branchref dirty [] jl
|
||||
return True
|
||||
, if dirty
|
||||
then ifM (annexAlwaysCommit <$> Annex.getGitConfig)
|
||||
( do
|
||||
lockJournal $ go branchref dirty []
|
||||
( lockJournal $ \jl -> do
|
||||
go' branchref dirty [] jl
|
||||
return True
|
||||
, return False
|
||||
)
|
||||
else return True
|
||||
)
|
||||
else do
|
||||
lockJournal $ go branchref dirty tomerge
|
||||
else lockJournal $ \jl -> do
|
||||
go' branchref dirty tomerge jl
|
||||
return True
|
||||
journalclean <- if journalcleaned
|
||||
then not <$> privateUUIDsKnown
|
||||
|
@ -220,10 +223,7 @@ updateTo' pairs = do
|
|||
{ refsWereMerged = not (null tomerge)
|
||||
, journalClean = journalclean
|
||||
}
|
||||
where
|
||||
excludeset s = filter (\(r, _) -> S.notMember r s)
|
||||
isnewer (r, _) = inRepo $ Git.Branch.changed fullname r
|
||||
go branchref dirty tomerge jl = stagejournalwhen dirty jl $ do
|
||||
go' branchref dirty tomerge jl = stagejournalwhen dirty jl $ do
|
||||
let (refs, branches) = unzip tomerge
|
||||
merge_desc <- if null tomerge
|
||||
then commitMessage
|
||||
|
@ -256,20 +256,31 @@ updateTo' pairs = do
|
|||
{- Gets the content of a file, which may be in the journal, or in the index
|
||||
- (and committed to the branch).
|
||||
-
|
||||
- Updates the branch if necessary, to ensure the most up-to-date available
|
||||
- content is returned.
|
||||
- Returns an empty string if the file doesn't exist yet.
|
||||
-
|
||||
- Returns an empty string if the file doesn't exist yet. -}
|
||||
- Updates the branch if necessary, to ensure the most up-to-date available
|
||||
- content is returned. When permissions prevent updating the branch,
|
||||
- reads the content from the journal, plus the branch, plus all unmerged
|
||||
- refs.
|
||||
-}
|
||||
get :: RawFilePath -> Annex L.ByteString
|
||||
get file = getCache file >>= \case
|
||||
get file = do
|
||||
st <- update
|
||||
case getCache file st of
|
||||
Just content -> return content
|
||||
Nothing -> do
|
||||
st <- update
|
||||
content <- if journalIgnorable st
|
||||
then getRef fullname file
|
||||
else getLocal file
|
||||
else if null (unmergedRefs st)
|
||||
then getLocal file
|
||||
else unmergedbranchfallback (unmergedRefs st)
|
||||
setCache file content
|
||||
return content
|
||||
where
|
||||
unmergedbranchfallback refs = do
|
||||
l <- getLocal file
|
||||
bs <- forM refs $ \ref -> getRef ref file
|
||||
return (l <> mconcat bs)
|
||||
|
||||
{- Used to cache the value of a file, which has been read from the branch
|
||||
- using some optimised method. The journal has to be checked, in case
|
||||
|
@ -285,7 +296,7 @@ precache file branchcontent = do
|
|||
JournalledContent journalcontent -> journalcontent
|
||||
PossiblyStaleJournalledContent journalcontent ->
|
||||
branchcontent <> journalcontent
|
||||
Annex.BranchState.setCache file content
|
||||
setCache file content
|
||||
|
||||
{- Like get, but does not merge the branch, so the info returned may not
|
||||
- reflect changes in remotes.
|
||||
|
@ -452,7 +463,7 @@ commitIndex' jl branchref message basemessage retrynum parents = do
|
|||
|
||||
{- Lists all files on the branch. including ones in the journal
|
||||
- that have not been committed yet. There may be duplicates in the list. -}
|
||||
files :: Annex ([RawFilePath], IO Bool)
|
||||
tfiles :: Annex ([RawFilePath], IO Bool)
|
||||
files = do
|
||||
_ <- update
|
||||
(bfs, cleanup) <- branchFiles
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
-
|
||||
- Runtime state about the git-annex branch, and a small cache.
|
||||
-
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -13,6 +13,7 @@ import Annex.Common
|
|||
import Types.BranchState
|
||||
import qualified Annex
|
||||
import Logs
|
||||
import qualified Git
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
|
@ -30,28 +31,46 @@ checkIndexOnce a = unlessM (indexChecked <$> getState) $ do
|
|||
a
|
||||
changeState $ \s -> s { indexChecked = True }
|
||||
|
||||
data UpdateMade
|
||||
= UpdateMade
|
||||
{ refsWereMerged :: Bool
|
||||
, journalClean :: Bool
|
||||
}
|
||||
| UpdateFailedPermissions
|
||||
{ refsUnmerged :: [Git.Sha]
|
||||
}
|
||||
|
||||
{- Runs an action to update the branch, if it's not been updated before
|
||||
- in this run of git-annex.
|
||||
-
|
||||
- The action should return True if anything that was in the journal
|
||||
- before got staged (or if the journal was empty). That lets an opmisation
|
||||
- be done: The journal then does not need to be checked going forward,
|
||||
- until new information gets written to it.
|
||||
-
|
||||
- When interactive access is enabled, the journal is always checked when
|
||||
- reading values from the branch, and so this does not need to update
|
||||
- the branch.
|
||||
-
|
||||
- When the action leaves the journal clean, by staging anything that
|
||||
- was in it, an optimisation is enabled: The journal does not need to
|
||||
- be checked going forward, until new information gets written to it.
|
||||
-
|
||||
- When the action is unable to update the branch due to a permissions
|
||||
- problem,
|
||||
-}
|
||||
runUpdateOnce :: Annex Bool -> Annex BranchState
|
||||
runUpdateOnce a = do
|
||||
runUpdateOnce :: Annex UpdateMade -> Annex BranchState
|
||||
runUpdateOnce update = do
|
||||
st <- getState
|
||||
if branchUpdated st || needInteractiveAccess st
|
||||
then return st
|
||||
else do
|
||||
journalstaged <- a
|
||||
let stf = \st' -> st'
|
||||
um <- update
|
||||
let stf = case um of
|
||||
UpdateMade {} -> \st' -> st'
|
||||
{ branchUpdated = True
|
||||
, journalIgnorable = journalstaged
|
||||
, journalIgnorable = journalClean um
|
||||
}
|
||||
UpdateFailedPermissions {} -> \st' -> st'
|
||||
{ branchUpdated = True
|
||||
, journalIgnorable = False
|
||||
, unmergedRefs = refsUnmerged um
|
||||
, cachedFileContents = []
|
||||
}
|
||||
changeState stf
|
||||
return (stf st)
|
||||
|
@ -98,13 +117,13 @@ setCache file content = changeState $ \s -> s
|
|||
| length l < logFilesToCache = (file, content) : l
|
||||
| otherwise = (file, content) : Prelude.init l
|
||||
|
||||
getCache :: RawFilePath -> Annex (Maybe L.ByteString)
|
||||
getCache file = (\st -> go (cachedFileContents st) st) <$> getState
|
||||
getCache :: RawFilePath -> BranchState -> Maybe L.ByteString
|
||||
getCache file state = go (cachedFileContents state)
|
||||
where
|
||||
go [] _ = Nothing
|
||||
go ((f,c):rest) state
|
||||
go [] = Nothing
|
||||
go ((f,c):rest)
|
||||
| f == file && not (needInteractiveAccess state) = Just c
|
||||
| otherwise = go rest state
|
||||
| otherwise = go rest
|
||||
|
||||
invalidateCache :: Annex ()
|
||||
invalidateCache = changeState $ \s -> s { cachedFileContents = [] }
|
||||
|
|
|
@ -225,8 +225,9 @@ manualPull currentbranch remotes = do
|
|||
, return $ Just r
|
||||
)
|
||||
else return Nothing
|
||||
haddiverged <- Annex.Branch.refsWereMerged
|
||||
<$> liftAnnex Annex.Branch.forceUpdate
|
||||
haddiverged <- liftAnnex Annex.Branch.forceUpdate >>= return . \case
|
||||
u@(Annex.Branch.UpdateMade {}) -> Annex.Branch.refsWereMerged u
|
||||
(Annex.Branch.UpdateFailedPermissions {}) -> True
|
||||
forM_ remotes $ \r ->
|
||||
liftAnnex $ Command.Sync.mergeRemote r
|
||||
currentbranch mc def
|
||||
|
|
|
@ -70,8 +70,9 @@ onChange file
|
|||
| ".lock" `isSuffixOf` file = noop
|
||||
| isAnnexBranch file = do
|
||||
branchChanged
|
||||
diverged <- Annex.Branch.refsWereMerged
|
||||
<$> liftAnnex Annex.Branch.forceUpdate
|
||||
diverged <- liftAnnex Annex.Branch.forceUpdate >>= return . \case
|
||||
u@(Annex.Branch.UpdateMade {}) -> Annex.Branch.refsWereMerged u
|
||||
(Annex.Branch.UpdateFailedPermissions {}) -> True
|
||||
when diverged $ do
|
||||
updateExportTreeFromLogAll
|
||||
queueDeferredDownloads "retrying deferred download" Later
|
||||
|
|
|
@ -17,6 +17,10 @@ git-annex (8.20211124) UNRELEASED; urgency=medium
|
|||
* sync: Better error message when unable to export to a remote because
|
||||
remote.name.annex-tracking-branch is configured to a ref that does not
|
||||
exist.
|
||||
* Improved support for using git-annex in a read-only repository,
|
||||
git-annex branch information from remotes that cannot be merged into
|
||||
the git-annex branch will now not crash it, but will be merged in
|
||||
memory.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 23 Nov 2021 15:58:27 -0400
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex BranchState data type
|
||||
-
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -8,17 +8,23 @@
|
|||
module Types.BranchState where
|
||||
|
||||
import Common
|
||||
import qualified Git
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
data BranchState = BranchState
|
||||
{ branchUpdated :: Bool
|
||||
-- ^ has the branch been updated this run?
|
||||
-- ^ has the branch been updated this run? (Or an update tried and
|
||||
-- failed due to permissions.)
|
||||
, indexChecked :: Bool
|
||||
-- ^ has the index file been checked to exist?
|
||||
, journalIgnorable :: Bool
|
||||
-- ^ can reading the journal be skipped, while still getting
|
||||
-- sufficiently up-to-date information from the branch?
|
||||
, unmergedRefs :: [Git.Sha]
|
||||
-- ^ when the branch was not able to be updated due to permissions,
|
||||
-- these other git refs contain unmerged information and need to be
|
||||
-- queried, along with the index and the journal.
|
||||
, cachedFileContents :: [(RawFilePath, L.ByteString)]
|
||||
-- ^ contents of a few files recently read from the branch
|
||||
, needInteractiveAccess :: Bool
|
||||
|
@ -29,4 +35,4 @@ data BranchState = BranchState
|
|||
}
|
||||
|
||||
startBranchState :: BranchState
|
||||
startBranchState = BranchState False False False [] False
|
||||
startBranchState = BranchState False False False [] [] False
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue