implement maxsize log and command

* maxsize: New command to tell git-annex how large the expected maximum
  size of a repository is.
* vicfg: Include maxsize configuration.
This commit is contained in:
Joey Hess 2024-08-11 15:41:26 -04:00
parent d33ab4bbe4
commit 1265d7e5df
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
12 changed files with 119 additions and 6 deletions

View file

@ -75,6 +75,7 @@ import Types.RemoteConfig
import Types.TransferrerPool
import Types.VectorClock
import Types.Cluster
import Types.MaxSize
import Annex.VectorClock.Utility
import Annex.Debug.Utility
import qualified Database.Keys.Handle as Keys
@ -200,6 +201,7 @@ data AnnexState = AnnexState
, requiredcontentmap :: Maybe (FileMatcherMap Annex)
, remoteconfigmap :: Maybe (M.Map UUID RemoteConfig)
, clusters :: Maybe (Annex Clusters)
, maxsizes :: Maybe (M.Map UUID MaxSize)
, forcetrust :: TrustMap
, trustmap :: Maybe TrustMap
, groupmap :: Maybe GroupMap
@ -254,6 +256,7 @@ newAnnexState c r = do
, requiredcontentmap = Nothing
, remoteconfigmap = Nothing
, clusters = Nothing
, maxsizes = Nothing
, forcetrust = M.empty
, trustmap = Nothing
, groupmap = Nothing

View file

@ -20,6 +20,9 @@ git-annex (10.20240831) UNRELEASED; urgency=medium
own purposes.
* Support "balanced=" and "fullybalanced=" in preferred content expressions.
* Added --rebalance option.
* maxsize: New command to tell git-annex how large the expected maximum
size of a repository is.
* vicfg: Include maxsize configuration.
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400

View file

@ -131,6 +131,7 @@ import qualified Command.InitCluster
import qualified Command.UpdateCluster
import qualified Command.ExtendCluster
import qualified Command.UpdateProxy
import qualified Command.MaxSize
import qualified Command.Version
import qualified Command.RemoteDaemon
#ifdef WITH_ASSISTANT
@ -261,6 +262,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.UpdateCluster.cmd
, Command.ExtendCluster.cmd
, Command.UpdateProxy.cmd
, Command.MaxSize.cmd
, Command.Version.cmd
, Command.RemoteDaemon.cmd
#ifdef WITH_ASSISTANT

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2012-2022 Joey Hess <id@joeyh.name>
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -27,11 +27,13 @@ import Logs.PreferredContent
import Logs.Schedule
import Logs.Config
import Logs.NumCopies
import Logs.MaxSize
import Types.StandardGroups
import Types.ScheduledActivity
import Types.NumCopies
import Remote
import Git.Types (fromConfigKey, fromConfigValue)
import Utility.DataUnits
import qualified Utility.RawFilePath as R
cmd :: Command
@ -76,6 +78,7 @@ data Cfg = Cfg
, cfgGlobalConfigs :: M.Map ConfigKey ConfigValue
, cfgNumCopies :: Maybe NumCopies
, cfgMinCopies :: Maybe MinCopies
, cfgMaxSizeMap :: M.Map UUID (Maybe MaxSize)
}
getCfg :: Annex Cfg
@ -89,6 +92,7 @@ getCfg = Cfg
<*> loadGlobalConfig
<*> getGlobalNumCopies
<*> getGlobalMinCopies
<*> (M.map Just <$> getMaxSizes)
setCfg :: Cfg -> Cfg -> Annex ()
setCfg curcfg newcfg = do
@ -102,6 +106,10 @@ setCfg curcfg newcfg = do
mapM_ (uncurry setGlobalConfig) $ M.toList $ cfgGlobalConfigs diff
maybe noop setGlobalNumCopies $ cfgNumCopies diff
maybe noop setGlobalMinCopies $ cfgMinCopies diff
mapM_ (uncurry setmaxsize) $ M.toList $ cfgMaxSizeMap diff
where
setmaxsize _u Nothing = noop
setmaxsize u (Just sz) = recordMaxSize u sz
{- Default config has all the keys from the input config, but with their
- default values. -}
@ -116,6 +124,7 @@ defCfg curcfg = Cfg
, cfgGlobalConfigs = mapdef $ cfgGlobalConfigs curcfg
, cfgNumCopies = Nothing
, cfgMinCopies = Nothing
, cfgMaxSizeMap = mapdef $ cfgMaxSizeMap curcfg
}
where
mapdef :: forall k v. Default v => M.Map k v -> M.Map k v
@ -132,6 +141,7 @@ diffCfg curcfg newcfg = Cfg
, cfgGlobalConfigs = diff cfgGlobalConfigs
, cfgNumCopies = cfgNumCopies newcfg
, cfgMinCopies = cfgMinCopies newcfg
, cfgMaxSizeMap = diff cfgMaxSizeMap
}
where
diff f = M.differenceWith (\x y -> if x == y then Nothing else Just x)
@ -146,6 +156,7 @@ genCfg cfg descs = unlines $ intercalate [""]
, grouppreferredcontent
, standardgroups
, requiredcontent
, maxsizes
, schedule
, numcopies
, globalconfigs
@ -215,6 +226,12 @@ genCfg cfg descs = unlines $ intercalate [""]
, fromGroup (fromStandardGroup g), "=", standardPreferredContent g
]
maxsizes = settings cfg descs cfgMaxSizeMap
[ com "Maximum repository sizes"
]
(\(sz, u) -> line "maxsize" u $ maybe "" (\(MaxSize n) -> preciseSize storageUnits False n) sz)
(\u -> line "maxsize" u "")
schedule = settings cfg descs cfgScheduleMap
[ com "Scheduled activities"
, com "(Separate multiple activities with \"; \")"
@ -311,6 +328,11 @@ parseCfg defcfg = go [] defcfg . lines
Nothing ->
let m = M.insert (toGroup f) val (cfgGroupPreferredContentMap cfg)
in Right $ cfg { cfgGroupPreferredContentMap = m }
| setting == "maxsize" = case readSize dataUnits val of
Nothing -> Left "parse error (expected a size such as \"100 gb\")"
Just n ->
let m = M.insert u (Just (MaxSize n)) (cfgMaxSizeMap cfg)
in Right $ cfg { cfgMaxSizeMap = m }
| setting == "schedule" = case parseScheduledActivities val of
Left e -> Left e
Right l ->

View file

@ -100,6 +100,7 @@ topLevelNewUUIDBasedLogs =
[ exportLog
, proxyLog
, clusterLog
, maxSizeLog
]
{- Other top-level logs. -}
@ -162,6 +163,9 @@ proxyLog = "proxy.log"
clusterLog :: RawFilePath
clusterLog = "cluster.log"
maxSizeLog :: RawFilePath
maxSizeLog = "maxsize.log"
{- This is not a log file, it's where exported treeishes get grafted into
- the git-annex branch. -}
exportTreeGraftPoint :: RawFilePath

49
Logs/MaxSize.hs Normal file
View file

@ -0,0 +1,49 @@
{- git-annex maxsize log
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Logs.MaxSize (
MaxSize(..),
getMaxSizes,
recordMaxSize,
) where
import qualified Annex
import Annex.Common
import Types.MaxSize
import Logs
import Logs.UUIDBased
import Logs.MapLog
import qualified Annex.Branch
import qualified Data.Map as M
import Data.ByteString.Builder
import qualified Data.Attoparsec.ByteString as A
getMaxSizes :: Annex (M.Map UUID MaxSize)
getMaxSizes = maybe loadMaxSizes return =<< Annex.getState Annex.maxsizes
loadMaxSizes :: Annex (M.Map UUID MaxSize)
loadMaxSizes = do
maxsizes <- M.map value . fromMapLog . parseLogNew parseMaxSize
<$> Annex.Branch.get maxSizeLog
Annex.changeState $ \s -> s { Annex.maxsizes = Just maxsizes }
return maxsizes
recordMaxSize :: UUID -> MaxSize -> Annex ()
recordMaxSize uuid maxsize = do
c <- currentVectorClock
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) maxSizeLog $
(buildLogNew buildMaxSize)
. changeLog c uuid maxsize
. parseLogNew parseMaxSize
buildMaxSize :: MaxSize -> Builder
buildMaxSize (MaxSize n) = byteString (encodeBS (show n))
parseMaxSize :: A.Parser MaxSize
parseMaxSize = maybe (fail "maxsize parse failed") (pure . MaxSize)
. readish . decodeBS =<< A.takeByteString

11
Types/MaxSize.hs Normal file
View file

@ -0,0 +1,11 @@
{- git-annex maxsize type
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.MaxSize where
newtype MaxSize = MaxSize Integer
deriving (Show, Eq, Ord)

View file

@ -6,14 +6,12 @@ git-annex maxsize - configure maximum size of a repository
git annex maxsize repository size
git annex maxsize here --auto
git annex maxsize repository
# DESCRIPTION
This configures the maximum combined size of annexed files that can be
stored in a repository. When run without a size or the --auto option,
stored in a repository. When run without a size,
it displays the currently configured maxsize.
The repository can be specified by git remote name or
@ -33,7 +31,11 @@ gigabyte, then it would make sense to run
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
* `--bytes`
Displays the maximum size in bytes, disabling the default nicer units.
* The [[git-annex-common-options]](1) can also be used.
# SEE ALSO

View file

@ -333,6 +333,10 @@ content from the key-value store.
See [[git-annex-required]](1) for details.
* `maxsize repository [size]`
Get or set maximum size of repository.
* `initcluster`
Initializes a new cluster.

View file

@ -176,6 +176,16 @@ The file format is one line per group, starting with a timestamp, then a
space, then the group name followed by a space and then the preferred
content expression.
## `maxsize.log`
Records the maximum combined size of annexed files that can be stored in
a repository.
The file format is a timestamp, followed by the UUID of a repository,
followed by the size in bytes. For example:
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 100000000000
## `export.log`
Tracks what trees have been exported to special remotes by

View file

@ -45,7 +45,7 @@ Planned schedule of work:
Also note that "fullybalanced=foo:2" is not currently actually
implemented!
* Add `git-annex maxsize` command.
* `git-annex info` can use maxsize to display how full repositories are
* balanced= and fullybalanced= need to limit the set of repositories to
ones with enough free space to contain a key.

View file

@ -692,6 +692,7 @@ Executable git-annex
Command.LookupKey
Command.Map
Command.MatchExpression
Command.MaxSize
Command.Merge
Command.MetaData
Command.Migrate
@ -859,6 +860,7 @@ Executable git-annex
Logs.Line
Logs.Location
Logs.MapLog
Logs.MaxSize
Logs.MetaData
Logs.MetaData.Pure
Logs.Migrate
@ -986,6 +988,7 @@ Executable git-annex
Types.KeySource
Types.Link
Types.LockCache
Types.MaxSize
Types.Messages
Types.MetaData
Types.Mime