improve handling of unmerged git-annex branches in readonly repo

git-annex info was displaying a message that didn't make sense in
context.

In calcRepoSizes, it seems better to return the information from the
git-annex branch, rather than giving up. Especially since balanced
preferred content uses it, and we can't just give up evaluating a
preferred content expression if git-annex is to be usable in such a
readonly repo.

Commit 6d7ecd9e5d nobly wanted git-annex
to behave the same with such unmerged branches as it does when it can
merge them. But for the purposes of preferred content, it seems to me
there's a sense that such an unmerged branch is the same as a remote we
have not pulled from. The balanced preferred content will either way
operate under outdated information, and so make not the best choices.
This commit is contained in:
Joey Hess 2024-08-13 12:42:04 -04:00
parent 5c35b3d579
commit 467d80101a
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 46 additions and 27 deletions

View file

@ -1,6 +1,6 @@
{- management of the git-annex branch
-
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -36,6 +36,7 @@ module Annex.Branch (
performTransitions,
withIndex,
precache,
UnmergedBranches(..),
overBranchFileContents,
updatedFromTree,
) where
@ -977,6 +978,15 @@ prepRememberTreeish treeish graftpoint parent = do
inRepo $ Git.Branch.commitTree cmode
["graft cleanup"] [c] origtree
{- UnmergedBranches is used to indicate when a value was calculated in a
- read-only repository that has other git-annex branches that have not
- been merged in. The value does not include information from those
- branches.
-}
data UnmergedBranches t
= UnmergedBranches t
| NoUnmergedBranches t
{- Runs an action on the content of selected files from the branch.
- This is much faster than reading the content of each file in turn,
- because it lets git cat-file stream content without blocking.
@ -989,20 +999,17 @@ prepRememberTreeish treeish graftpoint parent = do
- with different content. This happens rarely, only when the journal
- contains additional information, and the last version of the
- file it returns is the most current one.
-
- In a read-only repository that has other git-annex branches that have
- not been merged in, returns Nothing, because it's not possible to
- efficiently handle that.
-}
overBranchFileContents
:: (RawFilePath -> Maybe v)
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a)
-> Annex (Maybe a)
-> Annex (UnmergedBranches a)
overBranchFileContents select go = do
st <- update
if not (null (unmergedRefs st))
then return Nothing
else Just <$> overBranchFileContents' select go st
v <- overBranchFileContents' select go st
return $ if not (null (unmergedRefs st))
then UnmergedBranches v
else NoUnmergedBranches v
overBranchFileContents'
:: (RawFilePath -> Maybe v)

View file

@ -8,6 +8,7 @@
module Annex.RepoSize where
import Annex.Common
import Annex.Branch (UnmergedBranches(..))
import Types.RepoSize
import Logs.Location
import Logs.UUID
@ -24,10 +25,12 @@ calcRepoSizes :: Annex (M.Map UUID RepoSize)
calcRepoSizes = do
knownuuids <- M.keys <$> uuidDescMap
let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids
overLocationLogs startmap $ \k locs m ->
return $
let sz = fromMaybe 0 $ fromKey keySize k
in foldl' (flip $ M.alter $ addksz sz) m locs
overLocationLogs startmap accum >>= \case
UnmergedBranches m -> return m
NoUnmergedBranches m -> return m
where
addksz ksz (Just (RepoSize sz)) = Just $ RepoSize $ sz + ksz
addksz ksz Nothing = Just $ RepoSize ksz
accum k locs m = return $
let sz = fromMaybe 0 $ fromKey keySize k
in foldl' (flip $ M.alter $ addksz sz) m locs

View file

@ -287,6 +287,9 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
overLocationLogs' ()
(\reader cont -> checktimelimit (discard reader) cont)
(\k _ () -> keyaction Nothing (SeekInput [], k, mkActionItem k))
>>= \case
Annex.Branch.NoUnmergedBranches () -> return ()
Annex.Branch.UnmergedBranches () -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
runkeyaction getks = do
keyaction <- mkkeyaction

View file

@ -33,6 +33,7 @@ import Annex.WorkTree
import Logs.UUID
import Logs.Trust
import Logs.Location
import Annex.Branch (UnmergedBranches(..))
import Annex.NumCopies
import Git.Config (boolConfig)
import qualified Git.LsTree as LsTree
@ -640,7 +641,7 @@ cachedAllRepoData = do
Just _ -> return s
Nothing -> do
matcher <- lift getKeyOnlyMatcher
!(d, rd) <- lift $ overLocationLogs (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
r <- lift $ overLocationLogs (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
ifM (matchOnKey matcher k)
( do
alivelocs <- snd
@ -650,9 +651,14 @@ cachedAllRepoData = do
return (d', rd')
, return (d, rd)
)
let s' = s { allRepoData = Just d, repoData = rd }
put s'
return s'
case r of
NoUnmergedBranches (!(d, rd)) -> do
let s' = s { allRepoData = Just d, repoData = rd }
put s'
return s'
UnmergedBranches _ -> do
lift $ warning "This repository is read-only, and there are unmerged git-annex branches. Information from those branches is not included here."
return s
where
accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo)

View file

@ -188,8 +188,8 @@ updateFromLog db@(ImportFeedDbHandle h) (oldtree, currtree)
-- is faster than diffing from the empty tree
-- and looking up every log file.
scanbranch = Annex.Branch.overBranchFileContents toscan goscan >>= \case
Just () -> return ()
Nothing -> scandiff
Annex.Branch.NoUnmergedBranches () -> return ()
Annex.Branch.UnmergedBranches () -> scandiff
toscan f
| isUrlLog f = Just ()

View file

@ -600,10 +600,12 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
maxsizes <- getMaxSizes
-- XXX do not calc this every time!
sizemap <- calcRepoSizes
let keysize = fromMaybe 0 (fromKey keySize key)
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
(Just (MaxSize maxsize), Just (RepoSize reposize)) ->
reposize + fromMaybe 0 (fromKey keySize key)
<= maxsize
if maybe False (`S.member` notpresent) mu
then reposize <= maxsize
else reposize + keysize <= maxsize
_ -> True
let candidates = S.filter hasspace groupmembers
return $ if S.null candidates

View file

@ -219,18 +219,18 @@ loggedKeysFor' u = loggedKeys' isthere
return there
{- This is much faster than loggedKeys. -}
overLocationLogs :: v -> (Key -> [UUID] -> v -> Annex v) -> Annex v
overLocationLogs :: v -> (Key -> [UUID] -> v -> Annex v) -> Annex (Annex.Branch.UnmergedBranches v)
overLocationLogs v = overLocationLogs' v (flip const)
overLocationLogs'
:: v
-> (Annex (Maybe (Key, RawFilePath, Maybe L.ByteString)) -> Annex v -> Annex v)
-> (Key -> [UUID] -> v -> Annex v)
-> Annex v
-> Annex (Annex.Branch.UnmergedBranches v)
overLocationLogs' iv discarder keyaction = do
config <- Annex.getGitConfig
clusters <- getClusters
let getk = locationLogFileKey config
let go v reader = reader >>= \case
Just (k, f, content) -> discarder reader $ do
@ -245,9 +245,7 @@ overLocationLogs' iv discarder keyaction = do
)
Nothing -> return v
Annex.Branch.overBranchFileContents getk (go iv) >>= \case
Just r -> return r
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
Annex.Branch.overBranchFileContents getk (go iv)
-- Cannot import Logs.Cluster due to a cycle.
-- Annex.clusters gets populated when starting up git-annex.