Support "sizebalanced=" and "fullysizebalanced=" too
Might want to make --rebalance turn balanced=group:N where N > 1 to fullysizebalanced=group:N. Have not yet determined if that will improve situations enough to be worth the extra work.
This commit is contained in:
parent
4e1dcc0372
commit
9e87061de2
4 changed files with 157 additions and 36 deletions
|
@ -173,6 +173,8 @@ preferredContentTokens pcd =
|
||||||
, ValueToken "onlyingroup" (usev $ limitOnlyInGroup $ getGroupMap pcd)
|
, ValueToken "onlyingroup" (usev $ limitOnlyInGroup $ getGroupMap pcd)
|
||||||
, ValueToken "balanced" (usev $ limitBalanced (repoUUID pcd) (getGroupMap pcd))
|
, ValueToken "balanced" (usev $ limitBalanced (repoUUID pcd) (getGroupMap pcd))
|
||||||
, ValueToken "fullybalanced" (usev $ limitFullyBalanced (repoUUID pcd) (getGroupMap pcd))
|
, ValueToken "fullybalanced" (usev $ limitFullyBalanced (repoUUID pcd) (getGroupMap pcd))
|
||||||
|
, ValueToken "sizebalanced" (usev $ limitSizeBalanced (repoUUID pcd) (getGroupMap pcd))
|
||||||
|
, ValueToken "fullysizebalanced" (usev $ limitFullySizeBalanced (repoUUID pcd) (getGroupMap pcd))
|
||||||
] ++ commonTokens LimitAnnexFiles
|
] ++ commonTokens LimitAnnexFiles
|
||||||
where
|
where
|
||||||
preferreddir = maybe "public" fromProposedAccepted $
|
preferreddir = maybe "public" fromProposedAccepted $
|
||||||
|
|
|
@ -19,6 +19,7 @@ git-annex (10.20240831) UNRELEASED; urgency=medium
|
||||||
remotes. External special remotes should not use that config for their
|
remotes. External special remotes should not use that config for their
|
||||||
own purposes.
|
own purposes.
|
||||||
* Support "balanced=" and "fullybalanced=" in preferred content expressions.
|
* Support "balanced=" and "fullybalanced=" in preferred content expressions.
|
||||||
|
* Support "sizebalanced=" and "fullysizebalanced=" too.
|
||||||
* Added --rebalance option.
|
* Added --rebalance option.
|
||||||
* maxsize: New command to tell git-annex how large the expected maximum
|
* maxsize: New command to tell git-annex how large the expected maximum
|
||||||
size of a repository is, and to display repository sizes.
|
size of a repository is, and to display repository sizes.
|
||||||
|
|
108
Limit.hs
108
Limit.hs
|
@ -558,7 +558,11 @@ limitOnlyInGroup getgroupmap groupname = Right $ MatchFiles
|
||||||
|
|
||||||
limitBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
limitBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||||
limitBalanced mu getgroupmap groupname = do
|
limitBalanced mu getgroupmap groupname = do
|
||||||
fullybalanced <- limitFullyBalanced mu getgroupmap groupname
|
fullybalanced <- limitFullyBalanced' "balanced" mu getgroupmap groupname
|
||||||
|
limitBalanced' "balanced" fullybalanced mu groupname
|
||||||
|
|
||||||
|
limitBalanced' :: String -> MatchFiles Annex -> Maybe UUID -> MkLimit Annex
|
||||||
|
limitBalanced' termname fullybalanced mu groupname = do
|
||||||
copies <- limitCopies $ if ':' `elem` groupname
|
copies <- limitCopies $ if ':' `elem` groupname
|
||||||
then groupname
|
then groupname
|
||||||
else groupname ++ ":1"
|
else groupname ++ ":1"
|
||||||
|
@ -588,38 +592,65 @@ limitBalanced mu getgroupmap groupname = do
|
||||||
matchNeedsLocationLog present ||
|
matchNeedsLocationLog present ||
|
||||||
matchNeedsLocationLog fullybalanced ||
|
matchNeedsLocationLog fullybalanced ||
|
||||||
matchNeedsLocationLog copies
|
matchNeedsLocationLog copies
|
||||||
, matchDesc = "balanced" =? groupname
|
, matchDesc = termname =? groupname
|
||||||
}
|
}
|
||||||
|
|
||||||
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||||
limitFullyBalanced mu getgroupmap want =
|
limitFullyBalanced = limitFullyBalanced' "fullybalanced"
|
||||||
|
|
||||||
|
limitFullyBalanced' :: String -> Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||||
|
limitFullyBalanced' = limitFullyBalanced'' filtercandidates
|
||||||
|
where
|
||||||
|
filtercandidates _ key candidates = do
|
||||||
|
maxsizes <- getMaxSizes
|
||||||
|
sizemap <- getRepoSizes False
|
||||||
|
currentlocs <- S.fromList <$> loggedLocations key
|
||||||
|
let keysize = fromMaybe 0 (fromKey keySize key)
|
||||||
|
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
||||||
|
(Just maxsize, Just reposize) ->
|
||||||
|
repoHasSpace keysize (u `S.member` currentlocs) reposize maxsize
|
||||||
|
_ -> True
|
||||||
|
return $ S.filter hasspace candidates
|
||||||
|
|
||||||
|
repoHasSpace :: Integer -> Bool -> RepoSize -> MaxSize -> Bool
|
||||||
|
repoHasSpace keysize inrepo (RepoSize reposize) (MaxSize maxsize)
|
||||||
|
| inrepo =
|
||||||
|
reposize <= maxsize
|
||||||
|
| otherwise =
|
||||||
|
reposize + keysize <= maxsize
|
||||||
|
|
||||||
|
limitFullyBalanced''
|
||||||
|
:: (Int -> Key -> S.Set UUID -> Annex (S.Set UUID))
|
||||||
|
-> String
|
||||||
|
-> Maybe UUID
|
||||||
|
-> Annex GroupMap
|
||||||
|
-> MkLimit Annex
|
||||||
|
limitFullyBalanced'' filtercandidates termname mu getgroupmap want =
|
||||||
case splitc ':' want of
|
case splitc ':' want of
|
||||||
[g] -> go g 1
|
[g] -> go g 1
|
||||||
[g, n] -> maybe
|
[g, n] -> maybe
|
||||||
(Left "bad number for fullybalanced")
|
(Left $ "bad number for " ++ termname)
|
||||||
(go g)
|
(go g)
|
||||||
(readish n)
|
(readish n)
|
||||||
_ -> Left "bad value for fullybalanced"
|
_ -> Left $ "bad value for " ++ termname
|
||||||
where
|
where
|
||||||
go s n = limitFullyBalanced' mu getgroupmap (toGroup s) n want
|
go s n = limitFullyBalanced''' filtercandidates termname mu
|
||||||
|
getgroupmap (toGroup s) n want
|
||||||
|
|
||||||
limitFullyBalanced' :: Maybe UUID -> Annex GroupMap -> Group -> Int -> MkLimit Annex
|
limitFullyBalanced'''
|
||||||
limitFullyBalanced' mu getgroupmap g n want = Right $ MatchFiles
|
:: (Int -> Key -> S.Set UUID -> Annex (S.Set UUID))
|
||||||
|
-> String
|
||||||
|
-> Maybe UUID
|
||||||
|
-> Annex GroupMap
|
||||||
|
-> Group
|
||||||
|
-> Int
|
||||||
|
-> MkLimit Annex
|
||||||
|
limitFullyBalanced''' filtercandidates termname mu getgroupmap g n want = Right $ MatchFiles
|
||||||
{ matchAction = const $ checkKey $ \key -> do
|
{ matchAction = const $ checkKey $ \key -> do
|
||||||
gm <- getgroupmap
|
gm <- getgroupmap
|
||||||
let groupmembers = fromMaybe S.empty $
|
let groupmembers = fromMaybe S.empty $
|
||||||
M.lookup g (uuidsByGroup gm)
|
M.lookup g (uuidsByGroup gm)
|
||||||
maxsizes <- getMaxSizes
|
candidates <- filtercandidates n key groupmembers
|
||||||
sizemap <- getRepoSizes False
|
|
||||||
let keysize = fromMaybe 0 (fromKey keySize key)
|
|
||||||
currentlocs <- S.fromList <$> loggedLocations key
|
|
||||||
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
|
||||||
(Just (MaxSize maxsize), Just (RepoSize reposize)) ->
|
|
||||||
if u `S.member` currentlocs
|
|
||||||
then reposize <= maxsize
|
|
||||||
else reposize + keysize <= maxsize
|
|
||||||
_ -> True
|
|
||||||
let candidates = S.filter hasspace groupmembers
|
|
||||||
return $ if S.null candidates
|
return $ if S.null candidates
|
||||||
then False
|
then False
|
||||||
else case (mu, M.lookup g (balancedPickerByGroup gm)) of
|
else case (mu, M.lookup g (balancedPickerByGroup gm)) of
|
||||||
|
@ -630,9 +661,46 @@ limitFullyBalanced' mu getgroupmap g n want = Right $ MatchFiles
|
||||||
, matchNeedsFileContent = False
|
, matchNeedsFileContent = False
|
||||||
, matchNeedsKey = True
|
, matchNeedsKey = True
|
||||||
, matchNeedsLocationLog = False
|
, matchNeedsLocationLog = False
|
||||||
, matchDesc = "fullybalanced" =? want
|
, matchDesc = termname =? want
|
||||||
}
|
}
|
||||||
|
|
||||||
|
limitSizeBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||||
|
limitSizeBalanced mu getgroupmap groupname = do
|
||||||
|
fullysizebalanced <- limitFullySizeBalanced' "sizebalanced" mu getgroupmap groupname
|
||||||
|
limitBalanced' "sizebalanced" fullysizebalanced mu groupname
|
||||||
|
|
||||||
|
limitFullySizeBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||||
|
limitFullySizeBalanced = limitFullySizeBalanced' "fullysizebalanced"
|
||||||
|
|
||||||
|
limitFullySizeBalanced' :: String -> Maybe UUID -> Annex GroupMap -> MkLimit Annex
|
||||||
|
limitFullySizeBalanced' = limitFullyBalanced'' filtercandidates
|
||||||
|
where
|
||||||
|
filtercandidates n key candidates = do
|
||||||
|
maxsizes <- getMaxSizes
|
||||||
|
sizemap <- getRepoSizes False
|
||||||
|
currentlocs <- S.fromList <$> loggedLocations key
|
||||||
|
let keysize = fromMaybe 0 (fromKey keySize key)
|
||||||
|
let go u = case (M.lookup u maxsizes, M.lookup u sizemap, u `S.member` currentlocs) of
|
||||||
|
(Just maxsize, Just reposize, inrepo)
|
||||||
|
| repoHasSpace keysize inrepo reposize maxsize ->
|
||||||
|
proportionfree keysize inrepo u reposize maxsize
|
||||||
|
| otherwise -> Nothing
|
||||||
|
_ -> Nothing
|
||||||
|
return $ S.fromList $
|
||||||
|
map fst $ take n $ reverse $ sortOn snd $
|
||||||
|
mapMaybe go $ S.toList candidates
|
||||||
|
|
||||||
|
proportionfree keysize inrepo u (RepoSize reposize) (MaxSize maxsize)
|
||||||
|
| maxsize > 0 = Just
|
||||||
|
( u
|
||||||
|
, fromIntegral freespacesanskey / fromIntegral maxsize
|
||||||
|
:: Double
|
||||||
|
)
|
||||||
|
| otherwise = Nothing
|
||||||
|
where
|
||||||
|
freespacesanskey = maxsize - reposize +
|
||||||
|
if inrepo then keysize else 0
|
||||||
|
|
||||||
{- Adds a limit to skip files not using a specified key-value backend. -}
|
{- Adds a limit to skip files not using a specified key-value backend. -}
|
||||||
addInBackend :: String -> Annex ()
|
addInBackend :: String -> Annex ()
|
||||||
addInBackend = addLimit . limitInBackend
|
addInBackend = addLimit . limitInBackend
|
||||||
|
|
|
@ -269,37 +269,39 @@ elsewhere to allow removing it).
|
||||||
The number is the number of repositories in the group that will
|
The number is the number of repositories in the group that will
|
||||||
want each file. When not specified, the default is 1.
|
want each file. When not specified, the default is 1.
|
||||||
|
|
||||||
|
For example, "balanced=backup:2", when there are 3 members of the backup
|
||||||
|
group, will make each backup repository want 2/3rds of the files.
|
||||||
|
|
||||||
For this to work, each repository in the group should have its preferred
|
For this to work, each repository in the group should have its preferred
|
||||||
content set to the same expression. Using `groupwanted` is a good
|
content set to the same expression. Using `groupwanted` is a good
|
||||||
way to do that.
|
way to do that.
|
||||||
|
|
||||||
For example, "balanced=backup:2", when there are 3 members of the backup
|
|
||||||
group, will make each backup repository want 2/3rds of the files.
|
|
||||||
|
|
||||||
The sizes of files are not taken into account, so it's possible for
|
The sizes of files are not taken into account, so it's possible for
|
||||||
one repository to get larger than usual files and so fill up before
|
one repository to get larger than usual files and so fill up before
|
||||||
the other repositories. But files are only wanted by repositories that
|
the other repositories. But files are only wanted by repositories that
|
||||||
have enough free space to hold them. So once a repository is full,
|
have enough free space to hold them. So once a repository is full,
|
||||||
the remaining repositories will have any additional files balanced
|
the remaining repositories will have any additional files balanced
|
||||||
amoung them. In order for this to work, you must use
|
amoung them. For git-annex to know when a repository is full,
|
||||||
[[git-annex-maxsize]](1) to specify the size of each repository in the
|
you must use [[git-annex-maxsize]](1) to specify the size of each
|
||||||
group.
|
repository in the group.
|
||||||
|
|
||||||
This usually avoids moving files between repositories of the group, even
|
This usually avoids moving files between repositories, even
|
||||||
if that means that things are not optimally balanced. Some of the ways
|
if that means that things are not optimally balanced. Some of the ways
|
||||||
that it can get out of balance include adding a new repository to the
|
that it can get out of balance include adding a new repository to the
|
||||||
group, or a file getting copied into more repositories in the group than
|
group, or a file getting copied into more repositories in the
|
||||||
the specified number. Running git-annex commands with the `--rebalance`
|
group than the specified number, or some of the repositories filling up.
|
||||||
option will make this expression instead behave like the `fullybalanced`
|
|
||||||
expression, which will make repositories want to move files around as
|
|
||||||
necessary in order to get fully balanced.
|
|
||||||
|
|
||||||
Using this in a perferred content expression makes git-annex need to do
|
Running git-annex commands with the `--rebalance` option will make this
|
||||||
|
expression instead behave like the `fullybalanced` expression, which will
|
||||||
|
make repositories want to move files around as necessary in order to get
|
||||||
|
fully balanced.
|
||||||
|
|
||||||
|
Using this in a preferred content expression makes git-annex need to do
|
||||||
some additional work to keep track of how full repositories are. Usually
|
some additional work to keep track of how full repositories are. Usually
|
||||||
that won't affect performance much. However, the first time git-annex
|
that won't affect performance much. However, the first time git-annex
|
||||||
processes this in a given git repository, it will need to examine
|
processes this expression in a given git repository, it will need to
|
||||||
all the locations of all files, which can be slow when there are a lot of
|
calculate the sizes of all repositories, which can be slow when there are
|
||||||
them. When this causes git-annex to do a lot of work, it will
|
a lot of files. When this causes git-annex to do a lot of work, it will
|
||||||
display "(calculating repository sizes)".
|
display "(calculating repository sizes)".
|
||||||
|
|
||||||
Note that `not balanced` is a bad thing to put in a preferred content
|
Note that `not balanced` is a bad thing to put in a preferred content
|
||||||
|
@ -316,6 +318,54 @@ elsewhere to allow removing it).
|
||||||
When the `--rebalance` option is used, `balanced` is the same as
|
When the `--rebalance` option is used, `balanced` is the same as
|
||||||
`fullybalanced`.
|
`fullybalanced`.
|
||||||
|
|
||||||
|
* `sizebalanced=groupname:number`
|
||||||
|
|
||||||
|
Distributes content amoung repositories in the group, keeping
|
||||||
|
repositories proportionally full.
|
||||||
|
|
||||||
|
The number is the number of repositories in the group that will
|
||||||
|
want each file. When not specified, the default is 1.
|
||||||
|
|
||||||
|
For this to work, you must use [[git-annex-maxsize]](1) to specify
|
||||||
|
the size of each repository in the group. When a repository's
|
||||||
|
maxsize has not been specified, it will not want any files.
|
||||||
|
|
||||||
|
For example, if one repository in the group has a maximum size of
|
||||||
|
100 gb with 60 gb used, and another has a maximum size of 50 gb with
|
||||||
|
25 gb used, the smaller one will want files (that fit in it),
|
||||||
|
and the larger one won't want any files
|
||||||
|
(that would fit in the smaller one)
|
||||||
|
until the smaller one gets equally full.
|
||||||
|
|
||||||
|
Note that, once a repository contains a file, it will continue to want
|
||||||
|
it, even if it's more full than other repositories. This is to avoid
|
||||||
|
churn in moving files around.
|
||||||
|
|
||||||
|
This is more likely to get out of balance than the `balanced=` expression
|
||||||
|
is, because git-annex does not always have a consistent knowledge of
|
||||||
|
how full repositories are. Consider for example if a laptop and a desktop
|
||||||
|
are each sending a new file to the group. They will both pick whichever
|
||||||
|
repository was least full, but that means both files go to the same
|
||||||
|
repository, when a better solution might have been to send the smaller
|
||||||
|
file to a different repository. When using `balanced=` in the same
|
||||||
|
situation, it's less likely that a repository will want both files.
|
||||||
|
|
||||||
|
Running git-annex commands with the `--rebalance` option will make this
|
||||||
|
expression instead behave like the `fullysizebalanced` expression, which
|
||||||
|
will make repositories want to move files around as necessary in order to
|
||||||
|
get fully balanced.
|
||||||
|
|
||||||
|
* `fullysizebalanced=groupname:number`
|
||||||
|
|
||||||
|
This is like `sizebalanced`, but allows moving content between repositories
|
||||||
|
in the group at any time to keep it fully balanced.
|
||||||
|
|
||||||
|
Normally "sizebalanced=groupname:number" is the same as
|
||||||
|
"(fullysizebalanced=groupname:number and not copies=groupname:number) or present"
|
||||||
|
|
||||||
|
When the `--rebalance` option is used, `sizebalanced` is the same as
|
||||||
|
`fullysizebalanced`.
|
||||||
|
|
||||||
* `anything`
|
* `anything`
|
||||||
|
|
||||||
Always matches.
|
Always matches.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue