From 476d223bce969d8be95dda9e0da3da1c302f4c80 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 20 Aug 2024 13:50:47 -0400 Subject: [PATCH] implement fullbalanced=group:N Rebalancing this when it gets into a suboptimal situation will need further work. --- Annex/Balanced.hs | 13 +++++++++---- Limit.hs | 20 +++++++++++++++----- doc/todo/git-annex_proxies.mdwn | 10 ++++------ 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/Annex/Balanced.hs b/Annex/Balanced.hs index ad917ef1e5..ab643287d6 100644 --- a/Annex/Balanced.hs +++ b/Annex/Balanced.hs @@ -17,16 +17,18 @@ import Data.Bits (shiftL) import qualified Data.Set as S import qualified Data.ByteArray as BA -type BalancedPicker = S.Set UUID -> Key -> UUID +-- The Int is how many UUIDs to pick. +type BalancedPicker = S.Set UUID -> Key -> Int -> [UUID] -- The set of UUIDs provided here are all the UUIDs that are ever -- expected to be picked amoung. A subset of that can be provided -- when later using the BalancedPicker. Neither set can be empty. balancedPicker :: S.Set UUID -> BalancedPicker -balancedPicker s = \s' key -> +balancedPicker s = \s' key num -> let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key) m = fromIntegral (S.size s') - in S.elemAt (fromIntegral (n `mod` m)) s' + in map (\i -> S.elemAt (fromIntegral ((n + i) `mod` m)) s') + [0..fromIntegral (num - 1)] where combineduuids = mconcat (map fromUUID (S.toAscList s)) @@ -36,7 +38,10 @@ balancedPicker s = \s' key -> {- The selection for a given key never changes. -} prop_balanced_stable :: Bool -prop_balanced_stable = balancedPicker us us k == toUUID "332" +prop_balanced_stable = and + [ balancedPicker us us k 1 == [toUUID "332"] + , balancedPicker us us k 3 == [toUUID "332", toUUID "333", toUUID "334"] + ] where us = S.fromList $ map (toUUID . show) [1..500 :: Int] k = fromJust $ deserializeKey "WORM--test" diff --git a/Limit.hs b/Limit.hs index d74befbc31..d1b222f456 100644 --- a/Limit.hs +++ b/Limit.hs @@ -592,7 +592,19 @@ limitBalanced mu getgroupmap groupname = do } limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex -limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles +limitFullyBalanced mu getgroupmap want = + case splitc ':' want of + [g] -> go g 1 + [g, n] -> maybe + (Left "bad number for fullybalanced") + (go g) + (readish n) + _ -> Left "bad value for fullybalanced" + where + go s n = limitFullyBalanced' mu getgroupmap (toGroup s) n want + +limitFullyBalanced' :: Maybe UUID -> Annex GroupMap -> Group -> Int -> MkLimit Annex +limitFullyBalanced' mu getgroupmap g n want = Right $ MatchFiles { matchAction = const $ checkKey $ \key -> do gm <- getgroupmap let groupmembers = fromMaybe S.empty $ @@ -611,16 +623,14 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles return $ if S.null candidates then False else case (mu, M.lookup g (balancedPickerByGroup gm)) of - (Just u, Just picker) -> u == picker candidates key + (Just u, Just picker) -> u == picker candidates key n _ -> False , matchNeedsFileName = False , matchNeedsFileContent = False , matchNeedsKey = True , matchNeedsLocationLog = False - , matchDesc = "fullybalanced" =? groupname + , matchDesc = "fullybalanced" =? want } - where - g = toGroup groupname {- Adds a limit to skip files not using a specified key-value backend. -} addInBackend :: String -> Annex () diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 2ab97d85e6..cfd3530e7f 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -66,6 +66,10 @@ Planned schedule of work: command behave non-ideally, the same as the thread concurrency problems. +* implement size-based balancing, so all balanced repositories are around + the same percent full, perhaps as another preferred + content expression. + * `fullybalanced=foo:2` can get stuck in suboptimal situations. Eg, when 2 out of 3 repositories are full, and the 3rd is mostly empty, it is no longer possible to add new files to 2 repositories. @@ -80,17 +84,11 @@ Planned schedule of work: Size based rebalancing may offer a solution; see design. -* "fullybalanced=foo:2" is not currently actually implemented! - * `git-annex info` in the limitedcalc path in cachedAllRepoData double-counts redundant information from the journal due to using overLocationLogs. In the other path it does not, and this should be fixed for consistency and correctness. -* implement size-based balancing, so all balanced repositories are around - the same percent full, either as the default or as another preferred - content expression. - ## completed items for August's work on balanced preferred content * Balanced preferred content basic implementation, including --rebalance