improve handling of unmerged git-annex branches in readonly repo

git-annex info was displaying a message that didn't make sense in
context.

In calcRepoSizes, it seems better to return the information from the
git-annex branch, rather than giving up. Especially since balanced
preferred content uses it, and we can't just give up evaluating a
preferred content expression if git-annex is to be usable in such a
readonly repo.

Commit 6d7ecd9e5d nobly wanted git-annex
to behave the same with such unmerged branches as it does when it can
merge them. But for the purposes of preferred content, it seems to me
there's a sense that such an unmerged branch is the same as a remote we
have not pulled from. The balanced preferred content will either way
operate under outdated information, and so make not the best choices.
This commit is contained in:
Joey Hess 2024-08-13 12:42:04 -04:00
parent 5c35b3d579
commit 467d80101a
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 46 additions and 27 deletions

View file

@ -1,6 +1,6 @@
{- management of the git-annex branch {- management of the git-annex branch
- -
- Copyright 2011-2023 Joey Hess <id@joeyh.name> - Copyright 2011-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -36,6 +36,7 @@ module Annex.Branch (
performTransitions, performTransitions,
withIndex, withIndex,
precache, precache,
UnmergedBranches(..),
overBranchFileContents, overBranchFileContents,
updatedFromTree, updatedFromTree,
) where ) where
@ -977,6 +978,15 @@ prepRememberTreeish treeish graftpoint parent = do
inRepo $ Git.Branch.commitTree cmode inRepo $ Git.Branch.commitTree cmode
["graft cleanup"] [c] origtree ["graft cleanup"] [c] origtree
{- UnmergedBranches is used to indicate when a value was calculated in a
- read-only repository that has other git-annex branches that have not
- been merged in. The value does not include information from those
- branches.
-}
data UnmergedBranches t
= UnmergedBranches t
| NoUnmergedBranches t
{- Runs an action on the content of selected files from the branch. {- Runs an action on the content of selected files from the branch.
- This is much faster than reading the content of each file in turn, - This is much faster than reading the content of each file in turn,
- because it lets git cat-file stream content without blocking. - because it lets git cat-file stream content without blocking.
@ -989,20 +999,17 @@ prepRememberTreeish treeish graftpoint parent = do
- with different content. This happens rarely, only when the journal - with different content. This happens rarely, only when the journal
- contains additional information, and the last version of the - contains additional information, and the last version of the
- file it returns is the most current one. - file it returns is the most current one.
-
- In a read-only repository that has other git-annex branches that have
- not been merged in, returns Nothing, because it's not possible to
- efficiently handle that.
-} -}
overBranchFileContents overBranchFileContents
:: (RawFilePath -> Maybe v) :: (RawFilePath -> Maybe v)
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a) -> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a)
-> Annex (Maybe a) -> Annex (UnmergedBranches a)
overBranchFileContents select go = do overBranchFileContents select go = do
st <- update st <- update
if not (null (unmergedRefs st)) v <- overBranchFileContents' select go st
then return Nothing return $ if not (null (unmergedRefs st))
else Just <$> overBranchFileContents' select go st then UnmergedBranches v
else NoUnmergedBranches v
overBranchFileContents' overBranchFileContents'
:: (RawFilePath -> Maybe v) :: (RawFilePath -> Maybe v)

View file

@ -8,6 +8,7 @@
module Annex.RepoSize where module Annex.RepoSize where
import Annex.Common import Annex.Common
import Annex.Branch (UnmergedBranches(..))
import Types.RepoSize import Types.RepoSize
import Logs.Location import Logs.Location
import Logs.UUID import Logs.UUID
@ -24,10 +25,12 @@ calcRepoSizes :: Annex (M.Map UUID RepoSize)
calcRepoSizes = do calcRepoSizes = do
knownuuids <- M.keys <$> uuidDescMap knownuuids <- M.keys <$> uuidDescMap
let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids
overLocationLogs startmap $ \k locs m -> overLocationLogs startmap accum >>= \case
return $ UnmergedBranches m -> return m
let sz = fromMaybe 0 $ fromKey keySize k NoUnmergedBranches m -> return m
in foldl' (flip $ M.alter $ addksz sz) m locs
where where
addksz ksz (Just (RepoSize sz)) = Just $ RepoSize $ sz + ksz addksz ksz (Just (RepoSize sz)) = Just $ RepoSize $ sz + ksz
addksz ksz Nothing = Just $ RepoSize ksz addksz ksz Nothing = Just $ RepoSize ksz
accum k locs m = return $
let sz = fromMaybe 0 $ fromKey keySize k
in foldl' (flip $ M.alter $ addksz sz) m locs

View file

@ -287,6 +287,9 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
overLocationLogs' () overLocationLogs' ()
(\reader cont -> checktimelimit (discard reader) cont) (\reader cont -> checktimelimit (discard reader) cont)
(\k _ () -> keyaction Nothing (SeekInput [], k, mkActionItem k)) (\k _ () -> keyaction Nothing (SeekInput [], k, mkActionItem k))
>>= \case
Annex.Branch.NoUnmergedBranches () -> return ()
Annex.Branch.UnmergedBranches () -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
runkeyaction getks = do runkeyaction getks = do
keyaction <- mkkeyaction keyaction <- mkkeyaction

View file

@ -33,6 +33,7 @@ import Annex.WorkTree
import Logs.UUID import Logs.UUID
import Logs.Trust import Logs.Trust
import Logs.Location import Logs.Location
import Annex.Branch (UnmergedBranches(..))
import Annex.NumCopies import Annex.NumCopies
import Git.Config (boolConfig) import Git.Config (boolConfig)
import qualified Git.LsTree as LsTree import qualified Git.LsTree as LsTree
@ -640,7 +641,7 @@ cachedAllRepoData = do
Just _ -> return s Just _ -> return s
Nothing -> do Nothing -> do
matcher <- lift getKeyOnlyMatcher matcher <- lift getKeyOnlyMatcher
!(d, rd) <- lift $ overLocationLogs (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do r <- lift $ overLocationLogs (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
ifM (matchOnKey matcher k) ifM (matchOnKey matcher k)
( do ( do
alivelocs <- snd alivelocs <- snd
@ -650,9 +651,14 @@ cachedAllRepoData = do
return (d', rd') return (d', rd')
, return (d, rd) , return (d, rd)
) )
let s' = s { allRepoData = Just d, repoData = rd } case r of
put s' NoUnmergedBranches (!(d, rd)) -> do
return s' let s' = s { allRepoData = Just d, repoData = rd }
put s'
return s'
UnmergedBranches _ -> do
lift $ warning "This repository is read-only, and there are unmerged git-annex branches. Information from those branches is not included here."
return s
where where
accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo) accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo)

View file

@ -188,8 +188,8 @@ updateFromLog db@(ImportFeedDbHandle h) (oldtree, currtree)
-- is faster than diffing from the empty tree -- is faster than diffing from the empty tree
-- and looking up every log file. -- and looking up every log file.
scanbranch = Annex.Branch.overBranchFileContents toscan goscan >>= \case scanbranch = Annex.Branch.overBranchFileContents toscan goscan >>= \case
Just () -> return () Annex.Branch.NoUnmergedBranches () -> return ()
Nothing -> scandiff Annex.Branch.UnmergedBranches () -> scandiff
toscan f toscan f
| isUrlLog f = Just () | isUrlLog f = Just ()

View file

@ -600,10 +600,12 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
maxsizes <- getMaxSizes maxsizes <- getMaxSizes
-- XXX do not calc this every time! -- XXX do not calc this every time!
sizemap <- calcRepoSizes sizemap <- calcRepoSizes
let keysize = fromMaybe 0 (fromKey keySize key)
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
(Just (MaxSize maxsize), Just (RepoSize reposize)) -> (Just (MaxSize maxsize), Just (RepoSize reposize)) ->
reposize + fromMaybe 0 (fromKey keySize key) if maybe False (`S.member` notpresent) mu
<= maxsize then reposize <= maxsize
else reposize + keysize <= maxsize
_ -> True _ -> True
let candidates = S.filter hasspace groupmembers let candidates = S.filter hasspace groupmembers
return $ if S.null candidates return $ if S.null candidates

View file

@ -219,18 +219,18 @@ loggedKeysFor' u = loggedKeys' isthere
return there return there
{- This is much faster than loggedKeys. -} {- This is much faster than loggedKeys. -}
overLocationLogs :: v -> (Key -> [UUID] -> v -> Annex v) -> Annex v overLocationLogs :: v -> (Key -> [UUID] -> v -> Annex v) -> Annex (Annex.Branch.UnmergedBranches v)
overLocationLogs v = overLocationLogs' v (flip const) overLocationLogs v = overLocationLogs' v (flip const)
overLocationLogs' overLocationLogs'
:: v :: v
-> (Annex (Maybe (Key, RawFilePath, Maybe L.ByteString)) -> Annex v -> Annex v) -> (Annex (Maybe (Key, RawFilePath, Maybe L.ByteString)) -> Annex v -> Annex v)
-> (Key -> [UUID] -> v -> Annex v) -> (Key -> [UUID] -> v -> Annex v)
-> Annex v -> Annex (Annex.Branch.UnmergedBranches v)
overLocationLogs' iv discarder keyaction = do overLocationLogs' iv discarder keyaction = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
clusters <- getClusters clusters <- getClusters
let getk = locationLogFileKey config let getk = locationLogFileKey config
let go v reader = reader >>= \case let go v reader = reader >>= \case
Just (k, f, content) -> discarder reader $ do Just (k, f, content) -> discarder reader $ do
@ -245,9 +245,7 @@ overLocationLogs' iv discarder keyaction = do
) )
Nothing -> return v Nothing -> return v
Annex.Branch.overBranchFileContents getk (go iv) >>= \case Annex.Branch.overBranchFileContents getk (go iv)
Just r -> return r
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
-- Cannot import Logs.Cluster due to a cycle. -- Cannot import Logs.Cluster due to a cycle.
-- Annex.clusters gets populated when starting up git-annex. -- Annex.clusters gets populated when starting up git-annex.