take maxsize into account for balanced preferred content
This is very innefficient, it will need to be optimised not to calculate the sizes of repos every time. Also, fixed a bug in balancedPicker that caused it to pick a too high index when some repos were excluded due to being full.
This commit is contained in:
parent
b201792391
commit
745bc5c547
5 changed files with 60 additions and 11 deletions
|
@ -21,14 +21,14 @@ type BalancedPicker = S.Set UUID -> Key -> UUID
|
||||||
|
|
||||||
-- The set of UUIDs provided here are all the UUIDs that are ever
|
-- The set of UUIDs provided here are all the UUIDs that are ever
|
||||||
-- expected to be picked amoung. A subset of that can be provided
|
-- expected to be picked amoung. A subset of that can be provided
|
||||||
-- when later using the BalancedPicker.
|
-- when later using the BalancedPicker. Neither set can be empty.
|
||||||
balancedPicker :: S.Set UUID -> BalancedPicker
|
balancedPicker :: S.Set UUID -> BalancedPicker
|
||||||
balancedPicker s = \s' key ->
|
balancedPicker s = \s' key ->
|
||||||
let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key)
|
let n = calcMac tointeger HmacSha256 combineduuids (serializeKey' key)
|
||||||
|
m = fromIntegral (S.size s')
|
||||||
in S.elemAt (fromIntegral (n `mod` m)) s'
|
in S.elemAt (fromIntegral (n `mod` m)) s'
|
||||||
where
|
where
|
||||||
combineduuids = mconcat (map fromUUID (S.toAscList s))
|
combineduuids = mconcat (map fromUUID (S.toAscList s))
|
||||||
m = fromIntegral (S.size s)
|
|
||||||
|
|
||||||
tointeger :: Digest a -> Integer
|
tointeger :: Digest a -> Integer
|
||||||
tointeger = foldl' (\i b -> (i `shiftL` 8) + fromIntegral b) 0
|
tointeger = foldl' (\i b -> (i `shiftL` 8) + fromIntegral b) 0
|
||||||
|
|
33
Annex/RepoSize.hs
Normal file
33
Annex/RepoSize.hs
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
{- git-annex repo sizes
|
||||||
|
-
|
||||||
|
- Copyright 2024 Joey Hess <id@joeyh.name>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Annex.RepoSize where
|
||||||
|
|
||||||
|
import Annex.Common
|
||||||
|
import Types.RepoSize
|
||||||
|
import Logs.Location
|
||||||
|
import Logs.UUID
|
||||||
|
|
||||||
|
import qualified Data.Map.Strict as M
|
||||||
|
|
||||||
|
{- Sum up the sizes of all keys in all repositories, from the information
|
||||||
|
- in the git-annex branch. Can be slow.
|
||||||
|
-
|
||||||
|
- The map includes the UUIDs of all known repositories, including
|
||||||
|
- repositories that are empty.
|
||||||
|
-}
|
||||||
|
calcRepoSizes :: Annex (M.Map UUID RepoSize)
|
||||||
|
calcRepoSizes = do
|
||||||
|
knownuuids <- M.keys <$> uuidDescMap
|
||||||
|
let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids
|
||||||
|
overLocationLogs startmap $ \k locs m ->
|
||||||
|
return $
|
||||||
|
let sz = fromMaybe 0 $ fromKey keySize k
|
||||||
|
in foldl' (flip $ M.alter $ addksz sz) m locs
|
||||||
|
where
|
||||||
|
addksz ksz (Just (RepoSize sz)) = Just $ RepoSize $ sz + ksz
|
||||||
|
addksz ksz Nothing = Just $ RepoSize ksz
|
21
Limit.hs
21
Limit.hs
|
@ -17,6 +17,9 @@ import Annex.Content
|
||||||
import Annex.WorkTree
|
import Annex.WorkTree
|
||||||
import Annex.UUID
|
import Annex.UUID
|
||||||
import Annex.Magic
|
import Annex.Magic
|
||||||
|
import Annex.RepoSize
|
||||||
|
import Types.RepoSize
|
||||||
|
import Logs.MaxSize
|
||||||
import Annex.Link
|
import Annex.Link
|
||||||
import Types.Link
|
import Types.Link
|
||||||
import Logs.Trust
|
import Logs.Trust
|
||||||
|
@ -590,13 +593,23 @@ limitBalanced mu getgroupmap groupname = do
|
||||||
|
|
||||||
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||||
limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
|
limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
|
||||||
{ matchAction = const $ checkKey $ \key -> do
|
{ matchAction = \notpresent -> checkKey $ \key -> do
|
||||||
gm <- getgroupmap
|
gm <- getgroupmap
|
||||||
let groupmembers = fromMaybe S.empty $
|
let groupmembers = fromMaybe S.empty $
|
||||||
M.lookup g (uuidsByGroup gm)
|
M.lookup g (uuidsByGroup gm)
|
||||||
-- TODO free space checking
|
maxsizes <- getMaxSizes
|
||||||
return $ case (mu, M.lookup g (balancedPickerByGroup gm)) of
|
-- XXX do not calc this every time!
|
||||||
(Just u, Just picker) -> u == picker groupmembers key
|
sizemap <- calcRepoSizes
|
||||||
|
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
||||||
|
(Just (MaxSize maxsize), Just (RepoSize reposize)) ->
|
||||||
|
reposize + fromMaybe 0 (fromKey keySize key)
|
||||||
|
<= maxsize
|
||||||
|
_ -> True
|
||||||
|
let candidates = S.filter hasspace groupmembers
|
||||||
|
return $ if S.null candidates
|
||||||
|
then False
|
||||||
|
else case (mu, M.lookup g (balancedPickerByGroup gm)) of
|
||||||
|
(Just u, Just picker) -> u == picker candidates key
|
||||||
_ -> False
|
_ -> False
|
||||||
, matchNeedsFileName = False
|
, matchNeedsFileName = False
|
||||||
, matchNeedsFileContent = False
|
, matchNeedsFileContent = False
|
||||||
|
|
|
@ -45,13 +45,15 @@ Planned schedule of work:
|
||||||
Also note that "fullybalanced=foo:2" is not currently actually
|
Also note that "fullybalanced=foo:2" is not currently actually
|
||||||
implemented!
|
implemented!
|
||||||
|
|
||||||
* implement size-based balancing, either as the default or as another
|
* implement size-based balancing, so all balanced repositories are around
|
||||||
preferred content expression.
|
the same percent full, either as the default or as another preferred
|
||||||
|
content expression.
|
||||||
|
|
||||||
* `git-annex info` can use maxsize to display how full repositories are
|
* `git-annex info` can use maxsize to display how full repositories are
|
||||||
|
|
||||||
* balanced= and fullybalanced= need to limit the set of repositories to
|
* --rebalance is not stable. It will drop a key that was just stored in a
|
||||||
ones with enough free space to contain a key.
|
repo. Seems that limitFullyBalanced needs to take AssumeNotPresent
|
||||||
|
into account to handle dropping correctly.
|
||||||
|
|
||||||
* Implement [[track_free_space_in_repos_via_git-annex_branch]]:
|
* Implement [[track_free_space_in_repos_via_git-annex_branch]]:
|
||||||
|
|
||||||
|
|
|
@ -574,6 +574,7 @@ Executable git-annex
|
||||||
Annex.Queue
|
Annex.Queue
|
||||||
Annex.ReplaceFile
|
Annex.ReplaceFile
|
||||||
Annex.RemoteTrackingBranch
|
Annex.RemoteTrackingBranch
|
||||||
|
Annex.RepoSize
|
||||||
Annex.SafeDropProof
|
Annex.SafeDropProof
|
||||||
Annex.SpecialRemote
|
Annex.SpecialRemote
|
||||||
Annex.SpecialRemote.Config
|
Annex.SpecialRemote.Config
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue