implement fullbalanced=group:N

Rebalancing this when it gets into a suboptimal situation will need
further work.
This commit is contained in:
Joey Hess 2024-08-20 13:50:47 -04:00
parent d4b2f8201d
commit 476d223bce
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 28 additions and 15 deletions

View file

@ -17,16 +17,18 @@ import Data.Bits (shiftL)
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.ByteArray as BA import qualified Data.ByteArray as BA
type BalancedPicker = S.Set UUID -> Key -> UUID -- The Int is how many UUIDs to pick.
type BalancedPicker = S.Set UUID -> Key -> Int -> [UUID]
-- The set of UUIDs provided here are all the UUIDs that are ever -- The set of UUIDs provided here are all the UUIDs that are ever
-- expected to be picked amoung. A subset of that can be provided -- expected to be picked amoung. A subset of that can be provided
-- when later using the BalancedPicker. Neither set can be empty. -- when later using the BalancedPicker. Neither set can be empty.
balancedPicker :: S.Set UUID -> BalancedPicker balancedPicker :: S.Set UUID -> BalancedPicker
balancedPicker s = \s' key -> balancedPicker s = \s' key num ->
let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key) let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key)
m = fromIntegral (S.size s') m = fromIntegral (S.size s')
in S.elemAt (fromIntegral (n `mod` m)) s' in map (\i -> S.elemAt (fromIntegral ((n + i) `mod` m)) s')
[0..fromIntegral (num - 1)]
where where
combineduuids = mconcat (map fromUUID (S.toAscList s)) combineduuids = mconcat (map fromUUID (S.toAscList s))
@ -36,7 +38,10 @@ balancedPicker s = \s' key ->
{- The selection for a given key never changes. -} {- The selection for a given key never changes. -}
prop_balanced_stable :: Bool prop_balanced_stable :: Bool
prop_balanced_stable = balancedPicker us us k == toUUID "332" prop_balanced_stable = and
[ balancedPicker us us k 1 == [toUUID "332"]
, balancedPicker us us k 3 == [toUUID "332", toUUID "333", toUUID "334"]
]
where where
us = S.fromList $ map (toUUID . show) [1..500 :: Int] us = S.fromList $ map (toUUID . show) [1..500 :: Int]
k = fromJust $ deserializeKey "WORM--test" k = fromJust $ deserializeKey "WORM--test"

View file

@ -592,7 +592,19 @@ limitBalanced mu getgroupmap groupname = do
} }
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles limitFullyBalanced mu getgroupmap want =
case splitc ':' want of
[g] -> go g 1
[g, n] -> maybe
(Left "bad number for fullybalanced")
(go g)
(readish n)
_ -> Left "bad value for fullybalanced"
where
go s n = limitFullyBalanced' mu getgroupmap (toGroup s) n want
limitFullyBalanced' :: Maybe UUID -> Annex GroupMap -> Group -> Int -> MkLimit Annex
limitFullyBalanced' mu getgroupmap g n want = Right $ MatchFiles
{ matchAction = const $ checkKey $ \key -> do { matchAction = const $ checkKey $ \key -> do
gm <- getgroupmap gm <- getgroupmap
let groupmembers = fromMaybe S.empty $ let groupmembers = fromMaybe S.empty $
@ -611,16 +623,14 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
return $ if S.null candidates return $ if S.null candidates
then False then False
else case (mu, M.lookup g (balancedPickerByGroup gm)) of else case (mu, M.lookup g (balancedPickerByGroup gm)) of
(Just u, Just picker) -> u == picker candidates key (Just u, Just picker) -> u == picker candidates key n
_ -> False _ -> False
, matchNeedsFileName = False , matchNeedsFileName = False
, matchNeedsFileContent = False , matchNeedsFileContent = False
, matchNeedsKey = True , matchNeedsKey = True
, matchNeedsLocationLog = False , matchNeedsLocationLog = False
, matchDesc = "fullybalanced" =? groupname , matchDesc = "fullybalanced" =? want
} }
where
g = toGroup groupname
{- Adds a limit to skip files not using a specified key-value backend. -} {- Adds a limit to skip files not using a specified key-value backend. -}
addInBackend :: String -> Annex () addInBackend :: String -> Annex ()

View file

@ -66,6 +66,10 @@ Planned schedule of work:
command behave non-ideally, the same as the thread concurrency command behave non-ideally, the same as the thread concurrency
problems. problems.
* implement size-based balancing, so all balanced repositories are around
the same percent full, perhaps as another preferred
content expression.
* `fullybalanced=foo:2` can get stuck in suboptimal situations. Eg, * `fullybalanced=foo:2` can get stuck in suboptimal situations. Eg,
when 2 out of 3 repositories are full, and the 3rd is mostly empty, when 2 out of 3 repositories are full, and the 3rd is mostly empty,
it is no longer possible to add new files to 2 repositories. it is no longer possible to add new files to 2 repositories.
@ -80,17 +84,11 @@ Planned schedule of work:
Size based rebalancing may offer a solution; see design. Size based rebalancing may offer a solution; see design.
* "fullybalanced=foo:2" is not currently actually implemented!
* `git-annex info` in the limitedcalc path in cachedAllRepoData * `git-annex info` in the limitedcalc path in cachedAllRepoData
double-counts redundant information from the journal due to using double-counts redundant information from the journal due to using
overLocationLogs. In the other path it does not, and this should be fixed overLocationLogs. In the other path it does not, and this should be fixed
for consistency and correctness. for consistency and correctness.
* implement size-based balancing, so all balanced repositories are around
the same percent full, either as the default or as another preferred
content expression.
## completed items for August's work on balanced preferred content ## completed items for August's work on balanced preferred content
* Balanced preferred content basic implementation, including --rebalance * Balanced preferred content basic implementation, including --rebalance