diff --git a/Annex.hs b/Annex.hs index 0c41ad95c1..102471998d 100644 --- a/Annex.hs +++ b/Annex.hs @@ -75,6 +75,7 @@ import Types.RemoteConfig import Types.TransferrerPool import Types.VectorClock import Types.Cluster +import Types.MaxSize import Annex.VectorClock.Utility import Annex.Debug.Utility import qualified Database.Keys.Handle as Keys @@ -200,6 +201,7 @@ data AnnexState = AnnexState , requiredcontentmap :: Maybe (FileMatcherMap Annex) , remoteconfigmap :: Maybe (M.Map UUID RemoteConfig) , clusters :: Maybe (Annex Clusters) + , maxsizes :: Maybe (M.Map UUID MaxSize) , forcetrust :: TrustMap , trustmap :: Maybe TrustMap , groupmap :: Maybe GroupMap @@ -254,6 +256,7 @@ newAnnexState c r = do , requiredcontentmap = Nothing , remoteconfigmap = Nothing , clusters = Nothing + , maxsizes = Nothing , forcetrust = M.empty , trustmap = Nothing , groupmap = Nothing diff --git a/CHANGELOG b/CHANGELOG index 13659b244c..542fd3d864 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -20,6 +20,9 @@ git-annex (10.20240831) UNRELEASED; urgency=medium own purposes. * Support "balanced=" and "fullybalanced=" in preferred content expressions. * Added --rebalance option. + * maxsize: New command to tell git-annex how large the expected maximum + size of a repository is. + * vicfg: Include maxsize configuration. -- Joey Hess Wed, 31 Jul 2024 15:52:03 -0400 diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index afa41f4dd0..9b2efc5efd 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -131,6 +131,7 @@ import qualified Command.InitCluster import qualified Command.UpdateCluster import qualified Command.ExtendCluster import qualified Command.UpdateProxy +import qualified Command.MaxSize import qualified Command.Version import qualified Command.RemoteDaemon #ifdef WITH_ASSISTANT @@ -261,6 +262,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption , Command.UpdateCluster.cmd , Command.ExtendCluster.cmd , Command.UpdateProxy.cmd + , Command.MaxSize.cmd , Command.Version.cmd , Command.RemoteDaemon.cmd #ifdef WITH_ASSISTANT diff --git a/Command/Vicfg.hs b/Command/Vicfg.hs index 599d067fd9..806b5e5df0 100644 --- a/Command/Vicfg.hs +++ b/Command/Vicfg.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2012-2022 Joey Hess + - Copyright 2012-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -27,11 +27,13 @@ import Logs.PreferredContent import Logs.Schedule import Logs.Config import Logs.NumCopies +import Logs.MaxSize import Types.StandardGroups import Types.ScheduledActivity import Types.NumCopies import Remote import Git.Types (fromConfigKey, fromConfigValue) +import Utility.DataUnits import qualified Utility.RawFilePath as R cmd :: Command @@ -76,6 +78,7 @@ data Cfg = Cfg , cfgGlobalConfigs :: M.Map ConfigKey ConfigValue , cfgNumCopies :: Maybe NumCopies , cfgMinCopies :: Maybe MinCopies + , cfgMaxSizeMap :: M.Map UUID (Maybe MaxSize) } getCfg :: Annex Cfg @@ -89,6 +92,7 @@ getCfg = Cfg <*> loadGlobalConfig <*> getGlobalNumCopies <*> getGlobalMinCopies + <*> (M.map Just <$> getMaxSizes) setCfg :: Cfg -> Cfg -> Annex () setCfg curcfg newcfg = do @@ -102,6 +106,10 @@ setCfg curcfg newcfg = do mapM_ (uncurry setGlobalConfig) $ M.toList $ cfgGlobalConfigs diff maybe noop setGlobalNumCopies $ cfgNumCopies diff maybe noop setGlobalMinCopies $ cfgMinCopies diff + mapM_ (uncurry setmaxsize) $ M.toList $ cfgMaxSizeMap diff + where + setmaxsize _u Nothing = noop + setmaxsize u (Just sz) = recordMaxSize u sz {- Default config has all the keys from the input config, but with their - default values. -} @@ -116,6 +124,7 @@ defCfg curcfg = Cfg , cfgGlobalConfigs = mapdef $ cfgGlobalConfigs curcfg , cfgNumCopies = Nothing , cfgMinCopies = Nothing + , cfgMaxSizeMap = mapdef $ cfgMaxSizeMap curcfg } where mapdef :: forall k v. Default v => M.Map k v -> M.Map k v @@ -132,6 +141,7 @@ diffCfg curcfg newcfg = Cfg , cfgGlobalConfigs = diff cfgGlobalConfigs , cfgNumCopies = cfgNumCopies newcfg , cfgMinCopies = cfgMinCopies newcfg + , cfgMaxSizeMap = diff cfgMaxSizeMap } where diff f = M.differenceWith (\x y -> if x == y then Nothing else Just x) @@ -146,6 +156,7 @@ genCfg cfg descs = unlines $ intercalate [""] , grouppreferredcontent , standardgroups , requiredcontent + , maxsizes , schedule , numcopies , globalconfigs @@ -215,6 +226,12 @@ genCfg cfg descs = unlines $ intercalate [""] , fromGroup (fromStandardGroup g), "=", standardPreferredContent g ] + maxsizes = settings cfg descs cfgMaxSizeMap + [ com "Maximum repository sizes" + ] + (\(sz, u) -> line "maxsize" u $ maybe "" (\(MaxSize n) -> preciseSize storageUnits False n) sz) + (\u -> line "maxsize" u "") + schedule = settings cfg descs cfgScheduleMap [ com "Scheduled activities" , com "(Separate multiple activities with \"; \")" @@ -311,6 +328,11 @@ parseCfg defcfg = go [] defcfg . lines Nothing -> let m = M.insert (toGroup f) val (cfgGroupPreferredContentMap cfg) in Right $ cfg { cfgGroupPreferredContentMap = m } + | setting == "maxsize" = case readSize dataUnits val of + Nothing -> Left "parse error (expected a size such as \"100 gb\")" + Just n -> + let m = M.insert u (Just (MaxSize n)) (cfgMaxSizeMap cfg) + in Right $ cfg { cfgMaxSizeMap = m } | setting == "schedule" = case parseScheduledActivities val of Left e -> Left e Right l -> diff --git a/Logs.hs b/Logs.hs index 2260c3899a..91d4566bdd 100644 --- a/Logs.hs +++ b/Logs.hs @@ -100,6 +100,7 @@ topLevelNewUUIDBasedLogs = [ exportLog , proxyLog , clusterLog + , maxSizeLog ] {- Other top-level logs. -} @@ -162,6 +163,9 @@ proxyLog = "proxy.log" clusterLog :: RawFilePath clusterLog = "cluster.log" +maxSizeLog :: RawFilePath +maxSizeLog = "maxsize.log" + {- This is not a log file, it's where exported treeishes get grafted into - the git-annex branch. -} exportTreeGraftPoint :: RawFilePath diff --git a/Logs/MaxSize.hs b/Logs/MaxSize.hs new file mode 100644 index 0000000000..097cf71514 --- /dev/null +++ b/Logs/MaxSize.hs @@ -0,0 +1,49 @@ +{- git-annex maxsize log + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Logs.MaxSize ( + MaxSize(..), + getMaxSizes, + recordMaxSize, +) where + +import qualified Annex +import Annex.Common +import Types.MaxSize +import Logs +import Logs.UUIDBased +import Logs.MapLog +import qualified Annex.Branch + +import qualified Data.Map as M +import Data.ByteString.Builder +import qualified Data.Attoparsec.ByteString as A + +getMaxSizes :: Annex (M.Map UUID MaxSize) +getMaxSizes = maybe loadMaxSizes return =<< Annex.getState Annex.maxsizes + +loadMaxSizes :: Annex (M.Map UUID MaxSize) +loadMaxSizes = do + maxsizes <- M.map value . fromMapLog . parseLogNew parseMaxSize + <$> Annex.Branch.get maxSizeLog + Annex.changeState $ \s -> s { Annex.maxsizes = Just maxsizes } + return maxsizes + +recordMaxSize :: UUID -> MaxSize -> Annex () +recordMaxSize uuid maxsize = do + c <- currentVectorClock + Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) maxSizeLog $ + (buildLogNew buildMaxSize) + . changeLog c uuid maxsize + . parseLogNew parseMaxSize + +buildMaxSize :: MaxSize -> Builder +buildMaxSize (MaxSize n) = byteString (encodeBS (show n)) + +parseMaxSize :: A.Parser MaxSize +parseMaxSize = maybe (fail "maxsize parse failed") (pure . MaxSize) + . readish . decodeBS =<< A.takeByteString diff --git a/Types/MaxSize.hs b/Types/MaxSize.hs new file mode 100644 index 0000000000..bddcce5251 --- /dev/null +++ b/Types/MaxSize.hs @@ -0,0 +1,11 @@ +{- git-annex maxsize type + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Types.MaxSize where + +newtype MaxSize = MaxSize Integer + deriving (Show, Eq, Ord) diff --git a/doc/git-annex-maxsize.mdwn b/doc/git-annex-maxsize.mdwn index 9c0909467c..3efc1d623e 100644 --- a/doc/git-annex-maxsize.mdwn +++ b/doc/git-annex-maxsize.mdwn @@ -6,14 +6,12 @@ git-annex maxsize - configure maximum size of a repository git annex maxsize repository size -git annex maxsize here --auto - git annex maxsize repository # DESCRIPTION This configures the maximum combined size of annexed files that can be -stored in a repository. When run without a size or the --auto option, +stored in a repository. When run without a size, it displays the currently configured maxsize. The repository can be specified by git remote name or @@ -33,7 +31,11 @@ gigabyte, then it would make sense to run # OPTIONS -* The [[git-annex-common-options]](1) can be used. +* `--bytes` + + Displays the maximum size in bytes, disabling the default nicer units. + +* The [[git-annex-common-options]](1) can also be used. # SEE ALSO diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 810be2d4a2..b46526e8dc 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -333,6 +333,10 @@ content from the key-value store. See [[git-annex-required]](1) for details. +* `maxsize repository [size]` + + Get or set maximum size of repository. + * `initcluster` Initializes a new cluster. diff --git a/doc/internals.mdwn b/doc/internals.mdwn index c2a0c7874b..06a98e9559 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -176,6 +176,16 @@ The file format is one line per group, starting with a timestamp, then a space, then the group name followed by a space and then the preferred content expression. +## `maxsize.log` + +Records the maximum combined size of annexed files that can be stored in +a repository. + +The file format is a timestamp, followed by the UUID of a repository, +followed by the size in bytes. For example: + + 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 100000000000 + ## `export.log` Tracks what trees have been exported to special remotes by diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 35a8ce5269..48e2df2189 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -45,7 +45,7 @@ Planned schedule of work: Also note that "fullybalanced=foo:2" is not currently actually implemented! -* Add `git-annex maxsize` command. +* `git-annex info` can use maxsize to display how full repositories are * balanced= and fullybalanced= need to limit the set of repositories to ones with enough free space to contain a key. diff --git a/git-annex.cabal b/git-annex.cabal index b7b147c0e9..bcfb668268 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -692,6 +692,7 @@ Executable git-annex Command.LookupKey Command.Map Command.MatchExpression + Command.MaxSize Command.Merge Command.MetaData Command.Migrate @@ -859,6 +860,7 @@ Executable git-annex Logs.Line Logs.Location Logs.MapLog + Logs.MaxSize Logs.MetaData Logs.MetaData.Pure Logs.Migrate @@ -986,6 +988,7 @@ Executable git-annex Types.KeySource Types.Link Types.LockCache + Types.MaxSize Types.Messages Types.MetaData Types.Mime