From 745bc5c547480e666aefc19c983c606fb2fb16ac Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 13 Aug 2024 11:00:20 -0400 Subject: [PATCH] take maxsize into account for balanced preferred content This is very innefficient, it will need to be optimised not to calculate the sizes of repos every time. Also, fixed a bug in balancedPicker that caused it to pick a too high index when some repos were excluded due to being full. --- Annex/Balanced.hs | 4 ++-- Annex/RepoSize.hs | 33 +++++++++++++++++++++++++++++++++ Limit.hs | 23 ++++++++++++++++++----- doc/todo/git-annex_proxies.mdwn | 10 ++++++---- git-annex.cabal | 1 + 5 files changed, 60 insertions(+), 11 deletions(-) create mode 100644 Annex/RepoSize.hs diff --git a/Annex/Balanced.hs b/Annex/Balanced.hs index 46089ea0be..ad917ef1e5 100644 --- a/Annex/Balanced.hs +++ b/Annex/Balanced.hs @@ -21,14 +21,14 @@ type BalancedPicker = S.Set UUID -> Key -> UUID -- The set of UUIDs provided here are all the UUIDs that are ever -- expected to be picked amoung. A subset of that can be provided --- when later using the BalancedPicker. +-- when later using the BalancedPicker. Neither set can be empty. balancedPicker :: S.Set UUID -> BalancedPicker balancedPicker s = \s' key -> let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key) + m = fromIntegral (S.size s') in S.elemAt (fromIntegral (n `mod` m)) s' where combineduuids = mconcat (map fromUUID (S.toAscList s)) - m = fromIntegral (S.size s) tointeger :: Digest a -> Integer tointeger = foldl' (\i b -> (i `shiftL` 8) + fromIntegral b) 0 diff --git a/Annex/RepoSize.hs b/Annex/RepoSize.hs new file mode 100644 index 0000000000..e2f1702254 --- /dev/null +++ b/Annex/RepoSize.hs @@ -0,0 +1,33 @@ +{- git-annex repo sizes + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.RepoSize where + +import Annex.Common +import Types.RepoSize +import Logs.Location +import Logs.UUID + +import qualified Data.Map.Strict as M + +{- Sum up the sizes of all keys in all repositories, from the information + - in the git-annex branch. Can be slow. + - + - The map includes the UUIDs of all known repositories, including + - repositories that are empty. + -} +calcRepoSizes :: Annex (M.Map UUID RepoSize) +calcRepoSizes = do + knownuuids <- M.keys <$> uuidDescMap + let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids + overLocationLogs startmap $ \k locs m -> + return $ + let sz = fromMaybe 0 $ fromKey keySize k + in foldl' (flip $ M.alter $ addksz sz) m locs + where + addksz ksz (Just (RepoSize sz)) = Just $ RepoSize $ sz + ksz + addksz ksz Nothing = Just $ RepoSize ksz diff --git a/Limit.hs b/Limit.hs index 850abfaaef..4f558a7fa9 100644 --- a/Limit.hs +++ b/Limit.hs @@ -17,6 +17,9 @@ import Annex.Content import Annex.WorkTree import Annex.UUID import Annex.Magic +import Annex.RepoSize +import Types.RepoSize +import Logs.MaxSize import Annex.Link import Types.Link import Logs.Trust @@ -590,14 +593,24 @@ limitBalanced mu getgroupmap groupname = do limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles - { matchAction = const $ checkKey $ \key -> do + { matchAction = \notpresent -> checkKey $ \key -> do gm <- getgroupmap let groupmembers = fromMaybe S.empty $ M.lookup g (uuidsByGroup gm) - -- TODO free space checking - return $ case (mu, M.lookup g (balancedPickerByGroup gm)) of - (Just u, Just picker) -> u == picker groupmembers key - _ -> False + maxsizes <- getMaxSizes + -- XXX do not calc this every time! + sizemap <- calcRepoSizes + let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of + (Just (MaxSize maxsize), Just (RepoSize reposize)) -> + reposize + fromMaybe 0 (fromKey keySize key) + <= maxsize + _ -> True + let candidates = S.filter hasspace groupmembers + return $ if S.null candidates + then False + else case (mu, M.lookup g (balancedPickerByGroup gm)) of + (Just u, Just picker) -> u == picker candidates key + _ -> False , matchNeedsFileName = False , matchNeedsFileContent = False , matchNeedsKey = True diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 145f3cf27e..1b9607c713 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -45,13 +45,15 @@ Planned schedule of work: Also note that "fullybalanced=foo:2" is not currently actually implemented! -* implement size-based balancing, either as the default or as another - preferred content expression. +* implement size-based balancing, so all balanced repositories are around + the same percent full, either as the default or as another preferred + content expression. * `git-annex info` can use maxsize to display how full repositories are -* balanced= and fullybalanced= need to limit the set of repositories to - ones with enough free space to contain a key. +* --rebalance is not stable. It will drop a key that was just stored in a + repo. Seems that limitFullyBalanced needs to take AssumeNotPresent + into account to handle dropping correctly. * Implement [[track_free_space_in_repos_via_git-annex_branch]]: diff --git a/git-annex.cabal b/git-annex.cabal index 713c9e658c..b54a55c7a6 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -574,6 +574,7 @@ Executable git-annex Annex.Queue Annex.ReplaceFile Annex.RemoteTrackingBranch + Annex.RepoSize Annex.SafeDropProof Annex.SpecialRemote Annex.SpecialRemote.Config