implement fullbalanced=group:N

Rebalancing this when it gets into a suboptimal situation will need
further work.
This commit is contained in:
Joey Hess 2024-08-20 13:50:47 -04:00
parent d4b2f8201d
commit 476d223bce
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 28 additions and 15 deletions

View file

@ -17,16 +17,18 @@ import Data.Bits (shiftL)
import qualified Data.Set as S
import qualified Data.ByteArray as BA
type BalancedPicker = S.Set UUID -> Key -> UUID
-- The Int is how many UUIDs to pick.
type BalancedPicker = S.Set UUID -> Key -> Int -> [UUID]
-- The set of UUIDs provided here are all the UUIDs that are ever
-- expected to be picked amoung. A subset of that can be provided
-- when later using the BalancedPicker. Neither set can be empty.
balancedPicker :: S.Set UUID -> BalancedPicker
balancedPicker s = \s' key ->
balancedPicker s = \s' key num ->
let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key)
m = fromIntegral (S.size s')
in S.elemAt (fromIntegral (n `mod` m)) s'
in map (\i -> S.elemAt (fromIntegral ((n + i) `mod` m)) s')
[0..fromIntegral (num - 1)]
where
combineduuids = mconcat (map fromUUID (S.toAscList s))
@ -36,7 +38,10 @@ balancedPicker s = \s' key ->
{- The selection for a given key never changes. -}
prop_balanced_stable :: Bool
prop_balanced_stable = balancedPicker us us k == toUUID "332"
prop_balanced_stable = and
[ balancedPicker us us k 1 == [toUUID "332"]
, balancedPicker us us k 3 == [toUUID "332", toUUID "333", toUUID "334"]
]
where
us = S.fromList $ map (toUUID . show) [1..500 :: Int]
k = fromJust $ deserializeKey "WORM--test"

View file

@ -592,7 +592,19 @@ limitBalanced mu getgroupmap groupname = do
}
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
limitFullyBalanced mu getgroupmap want =
case splitc ':' want of
[g] -> go g 1
[g, n] -> maybe
(Left "bad number for fullybalanced")
(go g)
(readish n)
_ -> Left "bad value for fullybalanced"
where
go s n = limitFullyBalanced' mu getgroupmap (toGroup s) n want
limitFullyBalanced' :: Maybe UUID -> Annex GroupMap -> Group -> Int -> MkLimit Annex
limitFullyBalanced' mu getgroupmap g n want = Right $ MatchFiles
{ matchAction = const $ checkKey $ \key -> do
gm <- getgroupmap
let groupmembers = fromMaybe S.empty $
@ -611,16 +623,14 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
return $ if S.null candidates
then False
else case (mu, M.lookup g (balancedPickerByGroup gm)) of
(Just u, Just picker) -> u == picker candidates key
(Just u, Just picker) -> u == picker candidates key n
_ -> False
, matchNeedsFileName = False
, matchNeedsFileContent = False
, matchNeedsKey = True
, matchNeedsLocationLog = False
, matchDesc = "fullybalanced" =? groupname
, matchDesc = "fullybalanced" =? want
}
where
g = toGroup groupname
{- Adds a limit to skip files not using a specified key-value backend. -}
addInBackend :: String -> Annex ()

View file

@ -66,6 +66,10 @@ Planned schedule of work:
command behave non-ideally, the same as the thread concurrency
problems.
* implement size-based balancing, so all balanced repositories are around
the same percent full, perhaps as another preferred
content expression.
* `fullybalanced=foo:2` can get stuck in suboptimal situations. Eg,
when 2 out of 3 repositories are full, and the 3rd is mostly empty,
it is no longer possible to add new files to 2 repositories.
@ -80,17 +84,11 @@ Planned schedule of work:
Size based rebalancing may offer a solution; see design.
* "fullybalanced=foo:2" is not currently actually implemented!
* `git-annex info` in the limitedcalc path in cachedAllRepoData
double-counts redundant information from the journal due to using
overLocationLogs. In the other path it does not, and this should be fixed
for consistency and correctness.
* implement size-based balancing, so all balanced repositories are around
the same percent full, either as the default or as another preferred
content expression.
## completed items for August's work on balanced preferred content
* Balanced preferred content basic implementation, including --rebalance