handle transitions with read-only unmerged git-annex branches

Capstone to this feature. Any transitions that have been performed on an
unmerged remote ref but not on the local git-annex branch, or vice-versa
have to be applied on the fly when reading files.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2021-12-28 13:23:32 -04:00
parent 1291a7d86c
commit b1d719f9d2
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 95 additions and 28 deletions

View file

@ -80,6 +80,7 @@ import Logs.Remote.Pure
import Logs.Export.Pure import Logs.Export.Pure
import Logs.Difference.Pure import Logs.Difference.Pure
import qualified Annex.Queue import qualified Annex.Queue
import Types.Transitions
import Annex.Branch.Transitions import Annex.Branch.Transitions
import qualified Annex import qualified Annex
import Annex.Hook import Annex.Hook
@ -184,11 +185,13 @@ updateTo' pairs = do
- query operations still work, although they will need to do - query operations still work, although they will need to do
- additional work since the refs are not merged. -} - additional work since the refs are not merged. -}
catchPermissionDenied catchPermissionDenied
(const (return (UpdateFailedPermissions (map fst tomerge)))) (const (updatefailedperms tomerge))
(go branchref tomerge) (go branchref tomerge)
where where
excludeset s = filter (\(r, _) -> S.notMember r s) excludeset s = filter (\(r, _) -> S.notMember r s)
isnewer (r, _) = inRepo $ Git.Branch.changed fullname r isnewer (r, _) = inRepo $ Git.Branch.changed fullname r
go branchref tomerge = do go branchref tomerge = do
dirty <- journalDirty gitAnnexJournalDir dirty <- journalDirty gitAnnexJournalDir
journalcleaned <- if null tomerge journalcleaned <- if null tomerge
@ -223,6 +226,7 @@ updateTo' pairs = do
{ refsWereMerged = not (null tomerge) { refsWereMerged = not (null tomerge)
, journalClean = journalclean , journalClean = journalclean
} }
go' branchref dirty tomerge jl = stagejournalwhen dirty jl $ do go' branchref dirty tomerge jl = stagejournalwhen dirty jl $ do
let (refs, branches) = unzip tomerge let (refs, branches) = unzip tomerge
merge_desc <- if null tomerge merge_desc <- if null tomerge
@ -248,19 +252,49 @@ updateTo' pairs = do
) )
addMergedRefs tomerge addMergedRefs tomerge
invalidateCache invalidateCache
stagejournalwhen dirty jl a stagejournalwhen dirty jl a
| dirty = stageJournal jl a | dirty = stageJournal jl a
| otherwise = withIndex a | otherwise = withIndex a
-- Preparing for read-only branch access with unmerged remote refs.
updatefailedperms tomerge = do
let refs = map fst tomerge
-- Gather any transitions that are new to either the
-- local branch or a remote ref, which will need to be
-- applied on the fly.
localts <- getLocalTransitions
remotets <- mapM getRefTransitions refs
ts <- if all (localts ==) remotets
then return []
else
let tcs = mapMaybe getTransitionCalculator $
knownTransitionList $
combineTransitions (localts:remotets)
in if null tcs
then return []
else do
config <- Annex.getGitConfig
trustmap <- calcTrustMap <$> getStaged trustLog
remoteconfigmap <- calcRemoteConfigMap <$> getStaged remoteLog
return $ map (\c -> c trustmap remoteconfigmap config) tcs
return $ UpdateFailedPermissions
{ refsUnmerged = refs
, newTransitions = ts
}
{- Gets the content of a file, which may be in the journal, or in the index {- Gets the content of a file, which may be in the journal, or in the index
- (and committed to the branch). - (and committed to the branch).
- -
- Returns an empty string if the file doesn't exist yet. - Returns an empty string if the file doesn't exist yet.
- -
- Updates the branch if necessary, to ensure the most up-to-date available - Updates the branch if necessary, to ensure the most up-to-date available
- content is returned. When permissions prevent updating the branch, - content is returned.
- reads the content from the journal, plus the branch, plus all unmerged -
- refs. - When permissions prevented updating the branch, reads the content from the
- journal, plus the branch, plus all unmerged refs. In this case, any
- transitions that have not been applied to all refs will be applied on
- the fly.
-} -}
get :: RawFilePath -> Annex L.ByteString get :: RawFilePath -> Annex L.ByteString
get file = do get file = do
@ -272,14 +306,25 @@ get file = do
then getRef fullname file then getRef fullname file
else if null (unmergedRefs st) else if null (unmergedRefs st)
then getLocal file then getLocal file
else unmergedbranchfallback (unmergedRefs st) else unmergedbranchfallback st
setCache file content setCache file content
return content return content
where where
unmergedbranchfallback refs = do unmergedbranchfallback st = do
l <- getLocal file l <- getLocal file
bs <- forM refs $ \ref -> getRef ref file bs <- forM (unmergedRefs st) $ \ref -> getRef ref file
return (l <> mconcat bs) let content = l <> mconcat bs
return $ applytransitions (unhandledTransitions st) content
applytransitions [] content = content
applytransitions (changer:rest) content = case changer file content of
PreserveFile -> applytransitions rest content
ChangeFile builder -> do
let content' = toLazyByteString builder
if L.null content'
-- File is deleted, can't run any other
-- transitions on it.
then content'
else applytransitions rest content'
{- When the git-annex branch is unable to be updated due to permissions, {- When the git-annex branch is unable to be updated due to permissions,
- and there are other git-annex branches that have not been merged into - and there are other git-annex branches that have not been merged into
@ -656,11 +701,11 @@ getLocalTransitions =
-} -}
handleTransitions :: JournalLocked -> Transitions -> [Git.Ref] -> Annex Bool handleTransitions :: JournalLocked -> Transitions -> [Git.Ref] -> Annex Bool
handleTransitions jl localts refs = do handleTransitions jl localts refs = do
m <- M.fromList <$> mapM getRefTransitions refs remotets <- mapM getRefTransitions refs
let remotets = M.elems m
if all (localts ==) remotets if all (localts ==) remotets
then return False then return False
else do else do
let m = M.fromList (zip refs remotets)
let allts = combineTransitions (localts:remotets) let allts = combineTransitions (localts:remotets)
let (transitionedrefs, untransitionedrefs) = let (transitionedrefs, untransitionedrefs) =
partition (\r -> M.lookup r m == Just allts) refs partition (\r -> M.lookup r m == Just allts) refs

View file

@ -6,7 +6,6 @@
-} -}
module Annex.Branch.Transitions ( module Annex.Branch.Transitions (
FileTransition(..),
getTransitionCalculator, getTransitionCalculator,
filterBranch, filterBranch,
) where ) where
@ -23,23 +22,17 @@ import Types.TrustLevel
import Types.UUID import Types.UUID
import Types.MetaData import Types.MetaData
import Types.Remote import Types.Remote
import Types.Transitions
import Types.GitConfig (GitConfig) import Types.GitConfig (GitConfig)
import Types.ProposedAccepted import Types.ProposedAccepted
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder import Data.ByteString.Builder
data FileTransition getTransitionCalculator :: Transition -> Maybe (TrustMap -> M.Map UUID RemoteConfig -> GitConfig -> TransitionCalculator)
= ChangeFile Builder
| PreserveFile
type TransitionCalculator = GitConfig -> RawFilePath -> L.ByteString -> FileTransition
getTransitionCalculator :: Transition -> Maybe (TrustMap -> M.Map UUID RemoteConfig -> TransitionCalculator)
getTransitionCalculator ForgetGitHistory = Nothing getTransitionCalculator ForgetGitHistory = Nothing
getTransitionCalculator ForgetDeadRemotes = Just dropDead getTransitionCalculator ForgetDeadRemotes = Just dropDead
@ -55,7 +48,7 @@ getTransitionCalculator ForgetDeadRemotes = Just dropDead
-- the latter uuid, that also needs to be removed. The sameas-uuid -- the latter uuid, that also needs to be removed. The sameas-uuid
-- is not removed from the remote log, for the same reason the trust log -- is not removed from the remote log, for the same reason the trust log
-- is not changed. -- is not changed.
dropDead :: TrustMap -> M.Map UUID RemoteConfig -> TransitionCalculator dropDead :: TrustMap -> M.Map UUID RemoteConfig -> GitConfig -> TransitionCalculator
dropDead trustmap remoteconfigmap gc f content dropDead trustmap remoteconfigmap gc f content
| f == trustLog = PreserveFile | f == trustLog = PreserveFile
| f == remoteLog = ChangeFile $ | f == remoteLog = ChangeFile $
@ -78,7 +71,7 @@ dropDead trustmap remoteconfigmap gc f content
| otherwise = l | otherwise = l
minimizesameasdead' c = M.restrictKeys c (S.singleton sameasUUIDField) minimizesameasdead' c = M.restrictKeys c (S.singleton sameasUUIDField)
filterBranch :: (UUID -> Bool) -> TransitionCalculator filterBranch :: (UUID -> Bool) -> GitConfig -> TransitionCalculator
filterBranch wantuuid gc f content = case getLogVariety gc f of filterBranch wantuuid gc f content = case getLogVariety gc f of
Just OldUUIDBasedLog -> ChangeFile $ Just OldUUIDBasedLog -> ChangeFile $
UUIDBased.buildLogOld byteString $ UUIDBased.buildLogOld byteString $

View file

@ -11,6 +11,7 @@ module Annex.BranchState where
import Annex.Common import Annex.Common
import Types.BranchState import Types.BranchState
import Types.Transitions
import qualified Annex import qualified Annex
import Logs import Logs
import qualified Git import qualified Git
@ -38,6 +39,7 @@ data UpdateMade
} }
| UpdateFailedPermissions | UpdateFailedPermissions
{ refsUnmerged :: [Git.Sha] { refsUnmerged :: [Git.Sha]
, newTransitions :: [TransitionCalculator]
} }
{- Runs an action to update the branch, if it's not been updated before {- Runs an action to update the branch, if it's not been updated before
@ -70,6 +72,7 @@ runUpdateOnce update = do
{ branchUpdated = True { branchUpdated = True
, journalIgnorable = False , journalIgnorable = False
, unmergedRefs = refsUnmerged um , unmergedRefs = refsUnmerged um
, unhandledTransitions = newTransitions um
, cachedFileContents = [] , cachedFileContents = []
} }
changeState stf changeState stf

View file

@ -12,7 +12,8 @@ module Command.FilterBranch where
import Command import Command
import qualified Annex import qualified Annex
import qualified Annex.Branch import qualified Annex.Branch
import Annex.Branch.Transitions (filterBranch, FileTransition(..)) import Annex.Branch.Transitions
import Types.Transitions
import Annex.HashObject import Annex.HashObject
import Annex.Tmp import Annex.Tmp
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config

View file

@ -104,8 +104,7 @@ recordTransitions :: (RawFilePath -> (L.ByteString -> Builder) -> Annex ()) -> T
recordTransitions changer t = changer transitionsLog $ recordTransitions changer t = changer transitionsLog $
buildTransitions . S.union t . parseTransitionsStrictly "local" buildTransitions . S.union t . parseTransitionsStrictly "local"
getRefTransitions :: Git.Ref -> Annex (Git.Ref, Transitions) getRefTransitions :: Git.Ref -> Annex Transitions
getRefTransitions ref = do getRefTransitions ref =
ts <- parseTransitionsStrictly (fromRef ref) parseTransitionsStrictly (fromRef ref)
<$> catFile ref transitionsLog <$> catFile ref transitionsLog
return (ref, ts)

View file

@ -9,6 +9,7 @@ module Types.BranchState where
import Common import Common
import qualified Git import qualified Git
import Types.Transitions
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
@ -25,6 +26,9 @@ data BranchState = BranchState
-- ^ when the branch was not able to be updated due to permissions, -- ^ when the branch was not able to be updated due to permissions,
-- these other git refs contain unmerged information and need to be -- these other git refs contain unmerged information and need to be
-- queried, along with the index and the journal. -- queried, along with the index and the journal.
, unhandledTransitions :: [TransitionCalculator]
-- ^ when the branch was not able to be updated due to permissions,
-- this is transitions that need to be applied when making queries.
, cachedFileContents :: [(RawFilePath, L.ByteString)] , cachedFileContents :: [(RawFilePath, L.ByteString)]
-- ^ contents of a few files recently read from the branch -- ^ contents of a few files recently read from the branch
, needInteractiveAccess :: Bool , needInteractiveAccess :: Bool
@ -35,4 +39,4 @@ data BranchState = BranchState
} }
startBranchState :: BranchState startBranchState :: BranchState
startBranchState = BranchState False False False [] [] False startBranchState = BranchState False False False [] [] [] False

19
Types/Transitions.hs Normal file
View file

@ -0,0 +1,19 @@
{- git-annex transitions data types
-
- Copyright 2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.Transitions where
import Utility.RawFilePath
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Builder
data FileTransition
= ChangeFile Builder
| PreserveFile
type TransitionCalculator = RawFilePath -> L.ByteString -> FileTransition

View file

@ -3,9 +3,11 @@
subject="""comment 6""" subject="""comment 6"""
date="2021-12-27T17:38:49Z" date="2021-12-27T17:38:49Z"
content=""" content="""
Update: Completed and merged!
Current list of items that need to be fixed before this is mergeable: Current list of items that need to be fixed before this is mergeable:
- When there is a transition in one of the remote git-annex branches - (fixed) When there is a transition in one of the remote git-annex branches
that has not yet been applied to the local or other git-annex branches. that has not yet been applied to the local or other git-annex branches.
Transitions are not handled. Transitions are not handled.
- (fixed) `git-annex log` runs git log on the git-annex branch, and so - (fixed) `git-annex log` runs git log on the git-annex branch, and so

View file

@ -1052,6 +1052,7 @@ Executable git-annex
Types.Transfer Types.Transfer
Types.Transferrer Types.Transferrer
Types.TransferrerPool Types.TransferrerPool
Types.Transitions
Types.TrustLevel Types.TrustLevel
Types.UUID Types.UUID
Types.UrlContents Types.UrlContents