Support "sizebalanced=" and "fullysizebalanced=" too
Might want to make --rebalance turn balanced=group:N where N > 1 to fullysizebalanced=group:N. Have not yet determined if that will improve situations enough to be worth the extra work.
This commit is contained in:
parent
4e1dcc0372
commit
9e87061de2
4 changed files with 157 additions and 36 deletions
|
@ -173,6 +173,8 @@ preferredContentTokens pcd =
|
|||
, ValueToken "onlyingroup" (usev $ limitOnlyInGroup $ getGroupMap pcd)
|
||||
, ValueToken "balanced" (usev $ limitBalanced (repoUUID pcd) (getGroupMap pcd))
|
||||
, ValueToken "fullybalanced" (usev $ limitFullyBalanced (repoUUID pcd) (getGroupMap pcd))
|
||||
, ValueToken "sizebalanced" (usev $ limitSizeBalanced (repoUUID pcd) (getGroupMap pcd))
|
||||
, ValueToken "fullysizebalanced" (usev $ limitFullySizeBalanced (repoUUID pcd) (getGroupMap pcd))
|
||||
] ++ commonTokens LimitAnnexFiles
|
||||
where
|
||||
preferreddir = maybe "public" fromProposedAccepted $
|
||||
|
|
|
@ -19,6 +19,7 @@ git-annex (10.20240831) UNRELEASED; urgency=medium
|
|||
remotes. External special remotes should not use that config for their
|
||||
own purposes.
|
||||
* Support "balanced=" and "fullybalanced=" in preferred content expressions.
|
||||
* Support "sizebalanced=" and "fullysizebalanced=" too.
|
||||
* Added --rebalance option.
|
||||
* maxsize: New command to tell git-annex how large the expected maximum
|
||||
size of a repository is, and to display repository sizes.
|
||||
|
|
108
Limit.hs
108
Limit.hs
|
@ -558,7 +558,11 @@ limitOnlyInGroup getgroupmap groupname = Right $ MatchFiles
|
|||
|
||||
limitBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||
limitBalanced mu getgroupmap groupname = do
|
||||
fullybalanced <- limitFullyBalanced mu getgroupmap groupname
|
||||
fullybalanced <- limitFullyBalanced' "balanced" mu getgroupmap groupname
|
||||
limitBalanced' "balanced" fullybalanced mu groupname
|
||||
|
||||
limitBalanced' :: String -> MatchFiles Annex -> Maybe UUID -> MkLimit Annex
|
||||
limitBalanced' termname fullybalanced mu groupname = do
|
||||
copies <- limitCopies $ if ':' `elem` groupname
|
||||
then groupname
|
||||
else groupname ++ ":1"
|
||||
|
@ -588,38 +592,65 @@ limitBalanced mu getgroupmap groupname = do
|
|||
matchNeedsLocationLog present ||
|
||||
matchNeedsLocationLog fullybalanced ||
|
||||
matchNeedsLocationLog copies
|
||||
, matchDesc = "balanced" =? groupname
|
||||
, matchDesc = termname =? groupname
|
||||
}
|
||||
|
||||
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||
limitFullyBalanced mu getgroupmap want =
|
||||
limitFullyBalanced = limitFullyBalanced' "fullybalanced"
|
||||
|
||||
limitFullyBalanced' :: String -> Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||
limitFullyBalanced' = limitFullyBalanced'' filtercandidates
|
||||
where
|
||||
filtercandidates _ key candidates = do
|
||||
maxsizes <- getMaxSizes
|
||||
sizemap <- getRepoSizes False
|
||||
currentlocs <- S.fromList <$> loggedLocations key
|
||||
let keysize = fromMaybe 0 (fromKey keySize key)
|
||||
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
||||
(Just maxsize, Just reposize) ->
|
||||
repoHasSpace keysize (u `S.member` currentlocs) reposize maxsize
|
||||
_ -> True
|
||||
return $ S.filter hasspace candidates
|
||||
|
||||
repoHasSpace :: Integer -> Bool -> RepoSize -> MaxSize -> Bool
|
||||
repoHasSpace keysize inrepo (RepoSize reposize) (MaxSize maxsize)
|
||||
| inrepo =
|
||||
reposize <= maxsize
|
||||
| otherwise =
|
||||
reposize + keysize <= maxsize
|
||||
|
||||
limitFullyBalanced''
|
||||
:: (Int -> Key -> S.Set UUID -> Annex (S.Set UUID))
|
||||
-> String
|
||||
-> Maybe UUID
|
||||
-> Annex GroupMap
|
||||
-> MkLimit Annex
|
||||
limitFullyBalanced'' filtercandidates termname mu getgroupmap want =
|
||||
case splitc ':' want of
|
||||
[g] -> go g 1
|
||||
[g, n] -> maybe
|
||||
(Left "bad number for fullybalanced")
|
||||
(Left $ "bad number for " ++ termname)
|
||||
(go g)
|
||||
(readish n)
|
||||
_ -> Left "bad value for fullybalanced"
|
||||
_ -> Left $ "bad value for " ++ termname
|
||||
where
|
||||
go s n = limitFullyBalanced' mu getgroupmap (toGroup s) n want
|
||||
go s n = limitFullyBalanced''' filtercandidates termname mu
|
||||
getgroupmap (toGroup s) n want
|
||||
|
||||
limitFullyBalanced' :: Maybe UUID -> Annex GroupMap -> Group -> Int -> MkLimit Annex
|
||||
limitFullyBalanced' mu getgroupmap g n want = Right $ MatchFiles
|
||||
limitFullyBalanced'''
|
||||
:: (Int -> Key -> S.Set UUID -> Annex (S.Set UUID))
|
||||
-> String
|
||||
-> Maybe UUID
|
||||
-> Annex GroupMap
|
||||
-> Group
|
||||
-> Int
|
||||
-> MkLimit Annex
|
||||
limitFullyBalanced''' filtercandidates termname mu getgroupmap g n want = Right $ MatchFiles
|
||||
{ matchAction = const $ checkKey $ \key -> do
|
||||
gm <- getgroupmap
|
||||
let groupmembers = fromMaybe S.empty $
|
||||
M.lookup g (uuidsByGroup gm)
|
||||
maxsizes <- getMaxSizes
|
||||
sizemap <- getRepoSizes False
|
||||
let keysize = fromMaybe 0 (fromKey keySize key)
|
||||
currentlocs <- S.fromList <$> loggedLocations key
|
||||
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
||||
(Just (MaxSize maxsize), Just (RepoSize reposize)) ->
|
||||
if u `S.member` currentlocs
|
||||
then reposize <= maxsize
|
||||
else reposize + keysize <= maxsize
|
||||
_ -> True
|
||||
let candidates = S.filter hasspace groupmembers
|
||||
candidates <- filtercandidates n key groupmembers
|
||||
return $ if S.null candidates
|
||||
then False
|
||||
else case (mu, M.lookup g (balancedPickerByGroup gm)) of
|
||||
|
@ -630,9 +661,46 @@ limitFullyBalanced' mu getgroupmap g n want = Right $ MatchFiles
|
|||
, matchNeedsFileContent = False
|
||||
, matchNeedsKey = True
|
||||
, matchNeedsLocationLog = False
|
||||
, matchDesc = "fullybalanced" =? want
|
||||
, matchDesc = termname =? want
|
||||
}
|
||||
|
||||
limitSizeBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||
limitSizeBalanced mu getgroupmap groupname = do
|
||||
fullysizebalanced <- limitFullySizeBalanced' "sizebalanced" mu getgroupmap groupname
|
||||
limitBalanced' "sizebalanced" fullysizebalanced mu groupname
|
||||
|
||||
limitFullySizeBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||
limitFullySizeBalanced = limitFullySizeBalanced' "fullysizebalanced"
|
||||
|
||||
limitFullySizeBalanced' :: String -> Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||
limitFullySizeBalanced' = limitFullyBalanced'' filtercandidates
|
||||
where
|
||||
filtercandidates n key candidates = do
|
||||
maxsizes <- getMaxSizes
|
||||
sizemap <- getRepoSizes False
|
||||
currentlocs <- S.fromList <$> loggedLocations key
|
||||
let keysize = fromMaybe 0 (fromKey keySize key)
|
||||
let go u = case (M.lookup u maxsizes, M.lookup u sizemap, u `S.member` currentlocs) of
|
||||
(Just maxsize, Just reposize, inrepo)
|
||||
| repoHasSpace keysize inrepo reposize maxsize ->
|
||||
proportionfree keysize inrepo u reposize maxsize
|
||||
| otherwise -> Nothing
|
||||
_ -> Nothing
|
||||
return $ S.fromList $
|
||||
map fst $ take n $ reverse $ sortOn snd $
|
||||
mapMaybe go $ S.toList candidates
|
||||
|
||||
proportionfree keysize inrepo u (RepoSize reposize) (MaxSize maxsize)
|
||||
| maxsize > 0 = Just
|
||||
( u
|
||||
, fromIntegral freespacesanskey / fromIntegral maxsize
|
||||
:: Double
|
||||
)
|
||||
| otherwise = Nothing
|
||||
where
|
||||
freespacesanskey = maxsize - reposize +
|
||||
if inrepo then keysize else 0
|
||||
|
||||
{- Adds a limit to skip files not using a specified key-value backend. -}
|
||||
addInBackend :: String -> Annex ()
|
||||
addInBackend = addLimit . limitInBackend
|
||||
|
|
|
@ -268,38 +268,40 @@ elsewhere to allow removing it).
|
|||
|
||||
The number is the number of repositories in the group that will
|
||||
want each file. When not specified, the default is 1.
|
||||
|
||||
For example, "balanced=backup:2", when there are 3 members of the backup
|
||||
group, will make each backup repository want 2/3rds of the files.
|
||||
|
||||
For this to work, each repository in the group should have its preferred
|
||||
content set to the same expression. Using `groupwanted` is a good
|
||||
way to do that.
|
||||
|
||||
For example, "balanced=backup:2", when there are 3 members of the backup
|
||||
group, will make each backup repository want 2/3rds of the files.
|
||||
|
||||
The sizes of files are not taken into account, so it's possible for
|
||||
one repository to get larger than usual files and so fill up before
|
||||
the other repositories. But files are only wanted by repositories that
|
||||
have enough free space to hold them. So once a repository is full,
|
||||
the remaining repositories will have any additional files balanced
|
||||
amoung them. In order for this to work, you must use
|
||||
[[git-annex-maxsize]](1) to specify the size of each repository in the
|
||||
group.
|
||||
amoung them. For git-annex to know when a repository is full,
|
||||
you must use [[git-annex-maxsize]](1) to specify the size of each
|
||||
repository in the group.
|
||||
|
||||
This usually avoids moving files between repositories of the group, even
|
||||
This usually avoids moving files between repositories, even
|
||||
if that means that things are not optimally balanced. Some of the ways
|
||||
that it can get out of balance include adding a new repository to the
|
||||
group, or a file getting copied into more repositories in the group than
|
||||
the specified number. Running git-annex commands with the `--rebalance`
|
||||
option will make this expression instead behave like the `fullybalanced`
|
||||
expression, which will make repositories want to move files around as
|
||||
necessary in order to get fully balanced.
|
||||
group, or a file getting copied into more repositories in the
|
||||
group than the specified number, or some of the repositories filling up.
|
||||
|
||||
Using this in a perferred content expression makes git-annex need to do
|
||||
Running git-annex commands with the `--rebalance` option will make this
|
||||
expression instead behave like the `fullybalanced` expression, which will
|
||||
make repositories want to move files around as necessary in order to get
|
||||
fully balanced.
|
||||
|
||||
Using this in a preferred content expression makes git-annex need to do
|
||||
some additional work to keep track of how full repositories are. Usually
|
||||
that won't affect performance much. However, the first time git-annex
|
||||
processes this in a given git repository, it will need to examine
|
||||
all the locations of all files, which can be slow when there are a lot of
|
||||
them. When this causes git-annex to do a lot of work, it will
|
||||
processes this expression in a given git repository, it will need to
|
||||
calculate the sizes of all repositories, which can be slow when there are
|
||||
a lot of files. When this causes git-annex to do a lot of work, it will
|
||||
display "(calculating repository sizes)".
|
||||
|
||||
Note that `not balanced` is a bad thing to put in a preferred content
|
||||
|
@ -316,6 +318,54 @@ elsewhere to allow removing it).
|
|||
When the `--rebalance` option is used, `balanced` is the same as
|
||||
`fullybalanced`.
|
||||
|
||||
* `sizebalanced=groupname:number`
|
||||
|
||||
Distributes content amoung repositories in the group, keeping
|
||||
repositories proportionally full.
|
||||
|
||||
The number is the number of repositories in the group that will
|
||||
want each file. When not specified, the default is 1.
|
||||
|
||||
For this to work, you must use [[git-annex-maxsize]](1) to specify
|
||||
the size of each repository in the group. When a repository's
|
||||
maxsize has not been specified, it will not want any files.
|
||||
|
||||
For example, if one repository in the group has a maximum size of
|
||||
100 gb with 60 gb used, and another has a maximum size of 50 gb with
|
||||
25 gb used, the smaller one will want files (that fit in it),
|
||||
and the larger one won't want any files
|
||||
(that would fit in the smaller one)
|
||||
until the smaller one gets equally full.
|
||||
|
||||
Note that, once a repository contains a file, it will continue to want
|
||||
it, even if it's more full than other repositories. This is to avoid
|
||||
churn in moving files around.
|
||||
|
||||
This is more likely to get out of balance than the `balanced=` expression
|
||||
is, because git-annex does not always have a consistent knowledge of
|
||||
how full repositories are. Consider for example if a laptop and a desktop
|
||||
are each sending a new file to the group. They will both pick whichever
|
||||
repository was least full, but that means both files go to the same
|
||||
repository, when a better solution might have been to send the smaller
|
||||
file to a different repository. When using `balanced=` in the same
|
||||
situation, it's less likely that a repository will want both files.
|
||||
|
||||
Running git-annex commands with the `--rebalance` option will make this
|
||||
expression instead behave like the `fullysizebalanced` expression, which
|
||||
will make repositories want to move files around as necessary in order to
|
||||
get fully balanced.
|
||||
|
||||
* `fullysizebalanced=groupname:number`
|
||||
|
||||
This is like `sizebalanced`, but allows moving content between repositories
|
||||
in the group at any time to keep it fully balanced.
|
||||
|
||||
Normally "sizebalanced=groupname:number" is the same as
|
||||
"(fullysizebalanced=groupname:number and not copies=groupname:number) or present"
|
||||
|
||||
When the `--rebalance` option is used, `sizebalanced` is the same as
|
||||
`fullysizebalanced`.
|
||||
|
||||
* `anything`
|
||||
|
||||
Always matches.
|
||||
|
|
Loading…
Reference in a new issue