take maxsize into account for balanced preferred content
This is very innefficient, it will need to be optimised not to calculate the sizes of repos every time. Also, fixed a bug in balancedPicker that caused it to pick a too high index when some repos were excluded due to being full.
This commit is contained in:
parent
b201792391
commit
745bc5c547
5 changed files with 60 additions and 11 deletions
|
@ -21,14 +21,14 @@ type BalancedPicker = S.Set UUID -> Key -> UUID
|
|||
|
||||
-- The set of UUIDs provided here are all the UUIDs that are ever
|
||||
-- expected to be picked amoung. A subset of that can be provided
|
||||
-- when later using the BalancedPicker.
|
||||
-- when later using the BalancedPicker. Neither set can be empty.
|
||||
balancedPicker :: S.Set UUID -> BalancedPicker
|
||||
balancedPicker s = \s' key ->
|
||||
let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key)
|
||||
m = fromIntegral (S.size s')
|
||||
in S.elemAt (fromIntegral (n `mod` m)) s'
|
||||
where
|
||||
combineduuids = mconcat (map fromUUID (S.toAscList s))
|
||||
m = fromIntegral (S.size s)
|
||||
|
||||
tointeger :: Digest a -> Integer
|
||||
tointeger = foldl' (\i b -> (i `shiftL` 8) + fromIntegral b) 0
|
||||
|
|
33
Annex/RepoSize.hs
Normal file
33
Annex/RepoSize.hs
Normal file
|
@ -0,0 +1,33 @@
|
|||
{- git-annex repo sizes
|
||||
-
|
||||
- Copyright 2024 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.RepoSize where
|
||||
|
||||
import Annex.Common
|
||||
import Types.RepoSize
|
||||
import Logs.Location
|
||||
import Logs.UUID
|
||||
|
||||
import qualified Data.Map.Strict as M
|
||||
|
||||
{- Sum up the sizes of all keys in all repositories, from the information
|
||||
- in the git-annex branch. Can be slow.
|
||||
-
|
||||
- The map includes the UUIDs of all known repositories, including
|
||||
- repositories that are empty.
|
||||
-}
|
||||
calcRepoSizes :: Annex (M.Map UUID RepoSize)
|
||||
calcRepoSizes = do
|
||||
knownuuids <- M.keys <$> uuidDescMap
|
||||
let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids
|
||||
overLocationLogs startmap $ \k locs m ->
|
||||
return $
|
||||
let sz = fromMaybe 0 $ fromKey keySize k
|
||||
in foldl' (flip $ M.alter $ addksz sz) m locs
|
||||
where
|
||||
addksz ksz (Just (RepoSize sz)) = Just $ RepoSize $ sz + ksz
|
||||
addksz ksz Nothing = Just $ RepoSize ksz
|
23
Limit.hs
23
Limit.hs
|
@ -17,6 +17,9 @@ import Annex.Content
|
|||
import Annex.WorkTree
|
||||
import Annex.UUID
|
||||
import Annex.Magic
|
||||
import Annex.RepoSize
|
||||
import Types.RepoSize
|
||||
import Logs.MaxSize
|
||||
import Annex.Link
|
||||
import Types.Link
|
||||
import Logs.Trust
|
||||
|
@ -590,14 +593,24 @@ limitBalanced mu getgroupmap groupname = do
|
|||
|
||||
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||
limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
|
||||
{ matchAction = const $ checkKey $ \key -> do
|
||||
{ matchAction = \notpresent -> checkKey $ \key -> do
|
||||
gm <- getgroupmap
|
||||
let groupmembers = fromMaybe S.empty $
|
||||
M.lookup g (uuidsByGroup gm)
|
||||
-- TODO free space checking
|
||||
return $ case (mu, M.lookup g (balancedPickerByGroup gm)) of
|
||||
(Just u, Just picker) -> u == picker groupmembers key
|
||||
_ -> False
|
||||
maxsizes <- getMaxSizes
|
||||
-- XXX do not calc this every time!
|
||||
sizemap <- calcRepoSizes
|
||||
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
||||
(Just (MaxSize maxsize), Just (RepoSize reposize)) ->
|
||||
reposize + fromMaybe 0 (fromKey keySize key)
|
||||
<= maxsize
|
||||
_ -> True
|
||||
let candidates = S.filter hasspace groupmembers
|
||||
return $ if S.null candidates
|
||||
then False
|
||||
else case (mu, M.lookup g (balancedPickerByGroup gm)) of
|
||||
(Just u, Just picker) -> u == picker candidates key
|
||||
_ -> False
|
||||
, matchNeedsFileName = False
|
||||
, matchNeedsFileContent = False
|
||||
, matchNeedsKey = True
|
||||
|
|
|
@ -45,13 +45,15 @@ Planned schedule of work:
|
|||
Also note that "fullybalanced=foo:2" is not currently actually
|
||||
implemented!
|
||||
|
||||
* implement size-based balancing, either as the default or as another
|
||||
preferred content expression.
|
||||
* implement size-based balancing, so all balanced repositories are around
|
||||
the same percent full, either as the default or as another preferred
|
||||
content expression.
|
||||
|
||||
* `git-annex info` can use maxsize to display how full repositories are
|
||||
|
||||
* balanced= and fullybalanced= need to limit the set of repositories to
|
||||
ones with enough free space to contain a key.
|
||||
* --rebalance is not stable. It will drop a key that was just stored in a
|
||||
repo. Seems that limitFullyBalanced needs to take AssumeNotPresent
|
||||
into account to handle dropping correctly.
|
||||
|
||||
* Implement [[track_free_space_in_repos_via_git-annex_branch]]:
|
||||
|
||||
|
|
|
@ -574,6 +574,7 @@ Executable git-annex
|
|||
Annex.Queue
|
||||
Annex.ReplaceFile
|
||||
Annex.RemoteTrackingBranch
|
||||
Annex.RepoSize
|
||||
Annex.SafeDropProof
|
||||
Annex.SpecialRemote
|
||||
Annex.SpecialRemote.Config
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue