take maxsize into account for balanced preferred content

This is very innefficient, it will need to be optimised not to
calculate the sizes of repos every time.

Also, fixed a bug in balancedPicker that caused it to pick a too high
index when some repos were excluded due to being full.
This commit is contained in:
Joey Hess 2024-08-13 11:00:20 -04:00
parent b201792391
commit 745bc5c547
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 60 additions and 11 deletions

View file

@ -21,14 +21,14 @@ type BalancedPicker = S.Set UUID -> Key -> UUID
-- The set of UUIDs provided here are all the UUIDs that are ever -- The set of UUIDs provided here are all the UUIDs that are ever
-- expected to be picked amoung. A subset of that can be provided -- expected to be picked amoung. A subset of that can be provided
-- when later using the BalancedPicker. -- when later using the BalancedPicker. Neither set can be empty.
balancedPicker :: S.Set UUID -> BalancedPicker balancedPicker :: S.Set UUID -> BalancedPicker
balancedPicker s = \s' key -> balancedPicker s = \s' key ->
let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key) let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key)
m = fromIntegral (S.size s')
in S.elemAt (fromIntegral (n `mod` m)) s' in S.elemAt (fromIntegral (n `mod` m)) s'
where where
combineduuids = mconcat (map fromUUID (S.toAscList s)) combineduuids = mconcat (map fromUUID (S.toAscList s))
m = fromIntegral (S.size s)
tointeger :: Digest a -> Integer tointeger :: Digest a -> Integer
tointeger = foldl' (\i b -> (i `shiftL` 8) + fromIntegral b) 0 tointeger = foldl' (\i b -> (i `shiftL` 8) + fromIntegral b) 0

33
Annex/RepoSize.hs Normal file
View file

@ -0,0 +1,33 @@
{- git-annex repo sizes
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.RepoSize where
import Annex.Common
import Types.RepoSize
import Logs.Location
import Logs.UUID
import qualified Data.Map.Strict as M
{- Sum up the sizes of all keys in all repositories, from the information
- in the git-annex branch. Can be slow.
-
- The map includes the UUIDs of all known repositories, including
- repositories that are empty.
-}
calcRepoSizes :: Annex (M.Map UUID RepoSize)
calcRepoSizes = do
knownuuids <- M.keys <$> uuidDescMap
let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids
overLocationLogs startmap $ \k locs m ->
return $
let sz = fromMaybe 0 $ fromKey keySize k
in foldl' (flip $ M.alter $ addksz sz) m locs
where
addksz ksz (Just (RepoSize sz)) = Just $ RepoSize $ sz + ksz
addksz ksz Nothing = Just $ RepoSize ksz

View file

@ -17,6 +17,9 @@ import Annex.Content
import Annex.WorkTree import Annex.WorkTree
import Annex.UUID import Annex.UUID
import Annex.Magic import Annex.Magic
import Annex.RepoSize
import Types.RepoSize
import Logs.MaxSize
import Annex.Link import Annex.Link
import Types.Link import Types.Link
import Logs.Trust import Logs.Trust
@ -590,13 +593,23 @@ limitBalanced mu getgroupmap groupname = do
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
{ matchAction = const $ checkKey $ \key -> do { matchAction = \notpresent -> checkKey $ \key -> do
gm <- getgroupmap gm <- getgroupmap
let groupmembers = fromMaybe S.empty $ let groupmembers = fromMaybe S.empty $
M.lookup g (uuidsByGroup gm) M.lookup g (uuidsByGroup gm)
-- TODO free space checking maxsizes <- getMaxSizes
return $ case (mu, M.lookup g (balancedPickerByGroup gm)) of -- XXX do not calc this every time!
(Just u, Just picker) -> u == picker groupmembers key sizemap <- calcRepoSizes
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
(Just (MaxSize maxsize), Just (RepoSize reposize)) ->
reposize + fromMaybe 0 (fromKey keySize key)
<= maxsize
_ -> True
let candidates = S.filter hasspace groupmembers
return $ if S.null candidates
then False
else case (mu, M.lookup g (balancedPickerByGroup gm)) of
(Just u, Just picker) -> u == picker candidates key
_ -> False _ -> False
, matchNeedsFileName = False , matchNeedsFileName = False
, matchNeedsFileContent = False , matchNeedsFileContent = False

View file

@ -45,13 +45,15 @@ Planned schedule of work:
Also note that "fullybalanced=foo:2" is not currently actually Also note that "fullybalanced=foo:2" is not currently actually
implemented! implemented!
* implement size-based balancing, either as the default or as another * implement size-based balancing, so all balanced repositories are around
preferred content expression. the same percent full, either as the default or as another preferred
content expression.
* `git-annex info` can use maxsize to display how full repositories are * `git-annex info` can use maxsize to display how full repositories are
* balanced= and fullybalanced= need to limit the set of repositories to * --rebalance is not stable. It will drop a key that was just stored in a
ones with enough free space to contain a key. repo. Seems that limitFullyBalanced needs to take AssumeNotPresent
into account to handle dropping correctly.
* Implement [[track_free_space_in_repos_via_git-annex_branch]]: * Implement [[track_free_space_in_repos_via_git-annex_branch]]:

View file

@ -574,6 +574,7 @@ Executable git-annex
Annex.Queue Annex.Queue
Annex.ReplaceFile Annex.ReplaceFile
Annex.RemoteTrackingBranch Annex.RemoteTrackingBranch
Annex.RepoSize
Annex.SafeDropProof Annex.SafeDropProof
Annex.SpecialRemote Annex.SpecialRemote
Annex.SpecialRemote.Config Annex.SpecialRemote.Config