implement maxsize log and command

* maxsize: New command to tell git-annex how large the expected maximum
  size of a repository is.
* vicfg: Include maxsize configuration.
This commit is contained in:
Joey Hess 2024-08-11 15:41:26 -04:00
parent d33ab4bbe4
commit 1265d7e5df
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
12 changed files with 119 additions and 6 deletions

View file

@ -75,6 +75,7 @@ import Types.RemoteConfig
import Types.TransferrerPool import Types.TransferrerPool
import Types.VectorClock import Types.VectorClock
import Types.Cluster import Types.Cluster
import Types.MaxSize
import Annex.VectorClock.Utility import Annex.VectorClock.Utility
import Annex.Debug.Utility import Annex.Debug.Utility
import qualified Database.Keys.Handle as Keys import qualified Database.Keys.Handle as Keys
@ -200,6 +201,7 @@ data AnnexState = AnnexState
, requiredcontentmap :: Maybe (FileMatcherMap Annex) , requiredcontentmap :: Maybe (FileMatcherMap Annex)
, remoteconfigmap :: Maybe (M.Map UUID RemoteConfig) , remoteconfigmap :: Maybe (M.Map UUID RemoteConfig)
, clusters :: Maybe (Annex Clusters) , clusters :: Maybe (Annex Clusters)
, maxsizes :: Maybe (M.Map UUID MaxSize)
, forcetrust :: TrustMap , forcetrust :: TrustMap
, trustmap :: Maybe TrustMap , trustmap :: Maybe TrustMap
, groupmap :: Maybe GroupMap , groupmap :: Maybe GroupMap
@ -254,6 +256,7 @@ newAnnexState c r = do
, requiredcontentmap = Nothing , requiredcontentmap = Nothing
, remoteconfigmap = Nothing , remoteconfigmap = Nothing
, clusters = Nothing , clusters = Nothing
, maxsizes = Nothing
, forcetrust = M.empty , forcetrust = M.empty
, trustmap = Nothing , trustmap = Nothing
, groupmap = Nothing , groupmap = Nothing

View file

@ -20,6 +20,9 @@ git-annex (10.20240831) UNRELEASED; urgency=medium
own purposes. own purposes.
* Support "balanced=" and "fullybalanced=" in preferred content expressions. * Support "balanced=" and "fullybalanced=" in preferred content expressions.
* Added --rebalance option. * Added --rebalance option.
* maxsize: New command to tell git-annex how large the expected maximum
size of a repository is.
* vicfg: Include maxsize configuration.
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400 -- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400

View file

@ -131,6 +131,7 @@ import qualified Command.InitCluster
import qualified Command.UpdateCluster import qualified Command.UpdateCluster
import qualified Command.ExtendCluster import qualified Command.ExtendCluster
import qualified Command.UpdateProxy import qualified Command.UpdateProxy
import qualified Command.MaxSize
import qualified Command.Version import qualified Command.Version
import qualified Command.RemoteDaemon import qualified Command.RemoteDaemon
#ifdef WITH_ASSISTANT #ifdef WITH_ASSISTANT
@ -261,6 +262,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.UpdateCluster.cmd , Command.UpdateCluster.cmd
, Command.ExtendCluster.cmd , Command.ExtendCluster.cmd
, Command.UpdateProxy.cmd , Command.UpdateProxy.cmd
, Command.MaxSize.cmd
, Command.Version.cmd , Command.Version.cmd
, Command.RemoteDaemon.cmd , Command.RemoteDaemon.cmd
#ifdef WITH_ASSISTANT #ifdef WITH_ASSISTANT

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2012-2022 Joey Hess <id@joeyh.name> - Copyright 2012-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -27,11 +27,13 @@ import Logs.PreferredContent
import Logs.Schedule import Logs.Schedule
import Logs.Config import Logs.Config
import Logs.NumCopies import Logs.NumCopies
import Logs.MaxSize
import Types.StandardGroups import Types.StandardGroups
import Types.ScheduledActivity import Types.ScheduledActivity
import Types.NumCopies import Types.NumCopies
import Remote import Remote
import Git.Types (fromConfigKey, fromConfigValue) import Git.Types (fromConfigKey, fromConfigValue)
import Utility.DataUnits
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
cmd :: Command cmd :: Command
@ -76,6 +78,7 @@ data Cfg = Cfg
, cfgGlobalConfigs :: M.Map ConfigKey ConfigValue , cfgGlobalConfigs :: M.Map ConfigKey ConfigValue
, cfgNumCopies :: Maybe NumCopies , cfgNumCopies :: Maybe NumCopies
, cfgMinCopies :: Maybe MinCopies , cfgMinCopies :: Maybe MinCopies
, cfgMaxSizeMap :: M.Map UUID (Maybe MaxSize)
} }
getCfg :: Annex Cfg getCfg :: Annex Cfg
@ -89,6 +92,7 @@ getCfg = Cfg
<*> loadGlobalConfig <*> loadGlobalConfig
<*> getGlobalNumCopies <*> getGlobalNumCopies
<*> getGlobalMinCopies <*> getGlobalMinCopies
<*> (M.map Just <$> getMaxSizes)
setCfg :: Cfg -> Cfg -> Annex () setCfg :: Cfg -> Cfg -> Annex ()
setCfg curcfg newcfg = do setCfg curcfg newcfg = do
@ -102,6 +106,10 @@ setCfg curcfg newcfg = do
mapM_ (uncurry setGlobalConfig) $ M.toList $ cfgGlobalConfigs diff mapM_ (uncurry setGlobalConfig) $ M.toList $ cfgGlobalConfigs diff
maybe noop setGlobalNumCopies $ cfgNumCopies diff maybe noop setGlobalNumCopies $ cfgNumCopies diff
maybe noop setGlobalMinCopies $ cfgMinCopies diff maybe noop setGlobalMinCopies $ cfgMinCopies diff
mapM_ (uncurry setmaxsize) $ M.toList $ cfgMaxSizeMap diff
where
setmaxsize _u Nothing = noop
setmaxsize u (Just sz) = recordMaxSize u sz
{- Default config has all the keys from the input config, but with their {- Default config has all the keys from the input config, but with their
- default values. -} - default values. -}
@ -116,6 +124,7 @@ defCfg curcfg = Cfg
, cfgGlobalConfigs = mapdef $ cfgGlobalConfigs curcfg , cfgGlobalConfigs = mapdef $ cfgGlobalConfigs curcfg
, cfgNumCopies = Nothing , cfgNumCopies = Nothing
, cfgMinCopies = Nothing , cfgMinCopies = Nothing
, cfgMaxSizeMap = mapdef $ cfgMaxSizeMap curcfg
} }
where where
mapdef :: forall k v. Default v => M.Map k v -> M.Map k v mapdef :: forall k v. Default v => M.Map k v -> M.Map k v
@ -132,6 +141,7 @@ diffCfg curcfg newcfg = Cfg
, cfgGlobalConfigs = diff cfgGlobalConfigs , cfgGlobalConfigs = diff cfgGlobalConfigs
, cfgNumCopies = cfgNumCopies newcfg , cfgNumCopies = cfgNumCopies newcfg
, cfgMinCopies = cfgMinCopies newcfg , cfgMinCopies = cfgMinCopies newcfg
, cfgMaxSizeMap = diff cfgMaxSizeMap
} }
where where
diff f = M.differenceWith (\x y -> if x == y then Nothing else Just x) diff f = M.differenceWith (\x y -> if x == y then Nothing else Just x)
@ -146,6 +156,7 @@ genCfg cfg descs = unlines $ intercalate [""]
, grouppreferredcontent , grouppreferredcontent
, standardgroups , standardgroups
, requiredcontent , requiredcontent
, maxsizes
, schedule , schedule
, numcopies , numcopies
, globalconfigs , globalconfigs
@ -215,6 +226,12 @@ genCfg cfg descs = unlines $ intercalate [""]
, fromGroup (fromStandardGroup g), "=", standardPreferredContent g , fromGroup (fromStandardGroup g), "=", standardPreferredContent g
] ]
maxsizes = settings cfg descs cfgMaxSizeMap
[ com "Maximum repository sizes"
]
(\(sz, u) -> line "maxsize" u $ maybe "" (\(MaxSize n) -> preciseSize storageUnits False n) sz)
(\u -> line "maxsize" u "")
schedule = settings cfg descs cfgScheduleMap schedule = settings cfg descs cfgScheduleMap
[ com "Scheduled activities" [ com "Scheduled activities"
, com "(Separate multiple activities with \"; \")" , com "(Separate multiple activities with \"; \")"
@ -311,6 +328,11 @@ parseCfg defcfg = go [] defcfg . lines
Nothing -> Nothing ->
let m = M.insert (toGroup f) val (cfgGroupPreferredContentMap cfg) let m = M.insert (toGroup f) val (cfgGroupPreferredContentMap cfg)
in Right $ cfg { cfgGroupPreferredContentMap = m } in Right $ cfg { cfgGroupPreferredContentMap = m }
| setting == "maxsize" = case readSize dataUnits val of
Nothing -> Left "parse error (expected a size such as \"100 gb\")"
Just n ->
let m = M.insert u (Just (MaxSize n)) (cfgMaxSizeMap cfg)
in Right $ cfg { cfgMaxSizeMap = m }
| setting == "schedule" = case parseScheduledActivities val of | setting == "schedule" = case parseScheduledActivities val of
Left e -> Left e Left e -> Left e
Right l -> Right l ->

View file

@ -100,6 +100,7 @@ topLevelNewUUIDBasedLogs =
[ exportLog [ exportLog
, proxyLog , proxyLog
, clusterLog , clusterLog
, maxSizeLog
] ]
{- Other top-level logs. -} {- Other top-level logs. -}
@ -162,6 +163,9 @@ proxyLog = "proxy.log"
clusterLog :: RawFilePath clusterLog :: RawFilePath
clusterLog = "cluster.log" clusterLog = "cluster.log"
maxSizeLog :: RawFilePath
maxSizeLog = "maxsize.log"
{- This is not a log file, it's where exported treeishes get grafted into {- This is not a log file, it's where exported treeishes get grafted into
- the git-annex branch. -} - the git-annex branch. -}
exportTreeGraftPoint :: RawFilePath exportTreeGraftPoint :: RawFilePath

49
Logs/MaxSize.hs Normal file
View file

@ -0,0 +1,49 @@
{- git-annex maxsize log
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Logs.MaxSize (
MaxSize(..),
getMaxSizes,
recordMaxSize,
) where
import qualified Annex
import Annex.Common
import Types.MaxSize
import Logs
import Logs.UUIDBased
import Logs.MapLog
import qualified Annex.Branch
import qualified Data.Map as M
import Data.ByteString.Builder
import qualified Data.Attoparsec.ByteString as A
getMaxSizes :: Annex (M.Map UUID MaxSize)
getMaxSizes = maybe loadMaxSizes return =<< Annex.getState Annex.maxsizes
loadMaxSizes :: Annex (M.Map UUID MaxSize)
loadMaxSizes = do
maxsizes <- M.map value . fromMapLog . parseLogNew parseMaxSize
<$> Annex.Branch.get maxSizeLog
Annex.changeState $ \s -> s { Annex.maxsizes = Just maxsizes }
return maxsizes
recordMaxSize :: UUID -> MaxSize -> Annex ()
recordMaxSize uuid maxsize = do
c <- currentVectorClock
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) maxSizeLog $
(buildLogNew buildMaxSize)
. changeLog c uuid maxsize
. parseLogNew parseMaxSize
buildMaxSize :: MaxSize -> Builder
buildMaxSize (MaxSize n) = byteString (encodeBS (show n))
parseMaxSize :: A.Parser MaxSize
parseMaxSize = maybe (fail "maxsize parse failed") (pure . MaxSize)
. readish . decodeBS =<< A.takeByteString

11
Types/MaxSize.hs Normal file
View file

@ -0,0 +1,11 @@
{- git-annex maxsize type
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.MaxSize where
newtype MaxSize = MaxSize Integer
deriving (Show, Eq, Ord)

View file

@ -6,14 +6,12 @@ git-annex maxsize - configure maximum size of a repository
git annex maxsize repository size git annex maxsize repository size
git annex maxsize here --auto
git annex maxsize repository git annex maxsize repository
# DESCRIPTION # DESCRIPTION
This configures the maximum combined size of annexed files that can be This configures the maximum combined size of annexed files that can be
stored in a repository. When run without a size or the --auto option, stored in a repository. When run without a size,
it displays the currently configured maxsize. it displays the currently configured maxsize.
The repository can be specified by git remote name or The repository can be specified by git remote name or
@ -33,7 +31,11 @@ gigabyte, then it would make sense to run
# OPTIONS # OPTIONS
* The [[git-annex-common-options]](1) can be used. * `--bytes`
Displays the maximum size in bytes, disabling the default nicer units.
* The [[git-annex-common-options]](1) can also be used.
# SEE ALSO # SEE ALSO

View file

@ -333,6 +333,10 @@ content from the key-value store.
See [[git-annex-required]](1) for details. See [[git-annex-required]](1) for details.
* `maxsize repository [size]`
Get or set maximum size of repository.
* `initcluster` * `initcluster`
Initializes a new cluster. Initializes a new cluster.

View file

@ -176,6 +176,16 @@ The file format is one line per group, starting with a timestamp, then a
space, then the group name followed by a space and then the preferred space, then the group name followed by a space and then the preferred
content expression. content expression.
## `maxsize.log`
Records the maximum combined size of annexed files that can be stored in
a repository.
The file format is a timestamp, followed by the UUID of a repository,
followed by the size in bytes. For example:
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 100000000000
## `export.log` ## `export.log`
Tracks what trees have been exported to special remotes by Tracks what trees have been exported to special remotes by

View file

@ -45,7 +45,7 @@ Planned schedule of work:
Also note that "fullybalanced=foo:2" is not currently actually Also note that "fullybalanced=foo:2" is not currently actually
implemented! implemented!
* Add `git-annex maxsize` command. * `git-annex info` can use maxsize to display how full repositories are
* balanced= and fullybalanced= need to limit the set of repositories to * balanced= and fullybalanced= need to limit the set of repositories to
ones with enough free space to contain a key. ones with enough free space to contain a key.

View file

@ -692,6 +692,7 @@ Executable git-annex
Command.LookupKey Command.LookupKey
Command.Map Command.Map
Command.MatchExpression Command.MatchExpression
Command.MaxSize
Command.Merge Command.Merge
Command.MetaData Command.MetaData
Command.Migrate Command.Migrate
@ -859,6 +860,7 @@ Executable git-annex
Logs.Line Logs.Line
Logs.Location Logs.Location
Logs.MapLog Logs.MapLog
Logs.MaxSize
Logs.MetaData Logs.MetaData
Logs.MetaData.Pure Logs.MetaData.Pure
Logs.Migrate Logs.Migrate
@ -986,6 +988,7 @@ Executable git-annex
Types.KeySource Types.KeySource
Types.Link Types.Link
Types.LockCache Types.LockCache
Types.MaxSize
Types.Messages Types.Messages
Types.MetaData Types.MetaData
Types.Mime Types.Mime