From 570ceffe8ddf922a78a608af59f98b9839f8f2d0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 14 Jun 2024 17:13:23 -0400 Subject: [PATCH] broke out initcluster One benefit of this is that a typo in annex-cluster-node config won't init a new cluster. Also it gets the cluster description set and is consistent with initremote. --- CHANGELOG | 4 +-- CmdLine/GitAnnex.hs | 2 ++ Command/InitCluster.hs | 50 +++++++++++++++++++++++++++ Command/UpdateCluster.hs | 24 +++---------- doc/git-annex-initcluster.mdwn | 58 ++++++++++++++++++++++++++++++++ doc/git-annex-updatecluster.mdwn | 44 ++++++------------------ doc/git-annex.mdwn | 8 ++++- git-annex.cabal | 1 + 8 files changed, 135 insertions(+), 56 deletions(-) create mode 100644 Command/InitCluster.hs create mode 100644 doc/git-annex-initcluster.mdwn diff --git a/CHANGELOG b/CHANGELOG index a021ca783f..bf59a1ded2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,8 +2,8 @@ git-annex (10.20240532) UNRELEASED; urgency=medium * Added git-annex updateproxy command and remote.name.annex-proxy configuration. - * Added git-annex updatecluster command and remote.name.annex-cluster-node - and annex.cluster.name configuration. + * Added git-annex initcluster and updatecluster commands, + and remote.name.annex-cluster-node and annex.cluster.name configuration. * Fix a bug where interrupting git-annex while it is updating the git-annex branch for an export could later lead to git fsck complaining about missing tree objects. diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 11e9c9ad57..9bacea855a 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -124,6 +124,7 @@ import qualified Command.Smudge import qualified Command.FilterProcess import qualified Command.Restage import qualified Command.Undo +import qualified Command.InitCluster import qualified Command.UpdateCluster import qualified Command.UpdateProxy import qualified Command.Version @@ -249,6 +250,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption , Command.FilterProcess.cmd , Command.Restage.cmd , Command.Undo.cmd + , Command.InitCluster.cmd , Command.UpdateCluster.cmd , Command.UpdateProxy.cmd , Command.Version.cmd diff --git a/Command/InitCluster.hs b/Command/InitCluster.hs new file mode 100644 index 0000000000..d131c7e5f9 --- /dev/null +++ b/Command/InitCluster.hs @@ -0,0 +1,50 @@ +{- git-annex command + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Command.InitCluster where + +import Command +import qualified Annex +import Types.Cluster +import Logs.UUID +import Config +import Annex.UUID +import Git.Types +import Git.Remote (isLegalName) + +import qualified Data.Map as M + +cmd :: Command +cmd = command "initcluster" SectionSetup "initialize a new cluster" + (paramPair paramName paramDesc) (withParams seek) + +seek :: CmdParams -> CommandSeek +seek (clustername:desc:[]) = commandAction $ + start clustername (toUUIDDesc desc) +seek (clustername:[]) = commandAction $ + start clustername $ toUUIDDesc ("cluster " ++ clustername) +seek _ = giveup "Expected two parameters, name and description." + +start :: RemoteName -> UUIDDesc -> CommandStart +start clustername desc = starting "initcluster" ai si $ do + unless (isLegalName clustername) $ + giveup "That cluster name is not a valid git remote name." + + myclusters <- annexClusters <$> Annex.getGitConfig + unless (M.member clustername myclusters) $ do + cu <- fromMaybe (giveup "unable to generate a cluster UUID") + <$> genClusterUUID <$> liftIO genUUID + setConfig (annexConfig ("cluster." <> encodeBS clustername)) + (fromUUID (fromClusterUUID cu)) + describeUUID (fromClusterUUID cu) desc + + next $ return True + where + ai = ActionItemOther (Just (UnquotedString clustername)) + si = SeekInput [clustername] diff --git a/Command/UpdateCluster.hs b/Command/UpdateCluster.hs index 5e1474d293..ac38729e0e 100644 --- a/Command/UpdateCluster.hs +++ b/Command/UpdateCluster.hs @@ -13,8 +13,6 @@ import Command import qualified Annex import Types.Cluster import Logs.Cluster -import Config -import Annex.UUID import qualified Remote as R import qualified Types.Remote as R import qualified Command.UpdateProxy @@ -25,7 +23,7 @@ import qualified Data.Set as S cmd :: Command cmd = noMessages $ command "updatecluster" SectionSetup - "update records with cluster configuration" + "update records of cluster nodes" paramNothing (withParams seek) seek :: CmdParams -> CommandSeek @@ -40,25 +38,13 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do clusternames <- remoteAnnexClusterNode (R.gitconfig r) return $ M.fromList $ zip clusternames (repeat (S.singleton r)) let myclusternodes = M.unionsWith S.union (mapMaybe getnode rs) - - -- Generate cluster UUIDs and store in git config for each new cluster. myclusters <- annexClusters <$> Annex.getGitConfig - forM_ (M.keys myclusternodes) $ \clustername -> - unless (M.member clustername myclusters) $ do - liftIO $ putStrLn $ safeOutput $ - "Configuring new cluster: " ++ clustername - cu <- fromMaybe (giveup "unable to generate a cluster UUID") - <$> genClusterUUID <$> liftIO genUUID - setConfig (annexConfig ("cluster." <> encodeBS clustername)) - (fromUUID (fromClusterUUID cu)) - reloadConfig - - -- Update the cluster log to list the currently configured nodes - -- of each configured cluster. - myclusters' <- annexClusters <$> Annex.getGitConfig recordedclusters <- getClusters descs <- R.uuidDescriptions - forM_ (M.toList myclusters') $ \(clustername, cu) -> do + + -- Update the cluster log to list the currently configured nodes + -- of each configured cluster. + forM_ (M.toList myclusters) $ \(clustername, cu) -> do let mynodesremotes = fromMaybe mempty $ M.lookup clustername myclusternodes let mynodes = S.map (ClusterNodeUUID . R.uuid) mynodesremotes diff --git a/doc/git-annex-initcluster.mdwn b/doc/git-annex-initcluster.mdwn new file mode 100644 index 0000000000..7b9c9cb7f4 --- /dev/null +++ b/doc/git-annex-initcluster.mdwn @@ -0,0 +1,58 @@ +# NAME + +git-annex initcluster - initialize a new cluster + +# SYNOPSIS + +git-annex initcluster name [description] + +# DESCRIPTION + +A git-annex repository can provide access to its remotes as a unified +cluster. This allows other repositories to access the cluster as a remote, +with uploads and downloads distributed amoung the nodes of the cluster, +according to their preferred content settings. + +This command initializes a new cluster with the specified name. If no +description is provided, one will be set automatically. + +Once a cluster is initialized, the next step is to add nodes to it. +To make a remote be a node of the cluster, configure +`git config remote.name.annex-cluster-node`, setting it to the +name of the cluster. + +Finally, run `git-annex updatecluster` to record the cluster configuration +in the git-annex branch. That tells other repositories about the cluster. + +Example: + + git-annex initcluster mycluster + git config remote.foo.annex-cluster-node mycluster + git config remote.bar.annex-cluster-node mycluster + git config remote.baz.annex-cluster-node mycluster + git-annex updatecluster + +Suppose, for example, that remote "bigserver" has had those commands run in +it. Then after pulling from "bigserver", git-annex will know about an +additional remote, "bigserver-mycluster", which can be used like any other +remote but is an interface to the cluster as a whole. The individual cluster +nodes will also be proxied as remotes, eg "bigserver-foo". + +Clusters can only be accessed via ssh. + +# OPTIONS + +* The [[git-annex-common-options]](1) can be used. + +# SEE ALSO + +[[git-annex]](1) +[[git-annex-updatecluster]](1) +[[git-annex-preferred-content]](1) +[[git-annex-updateproxy]](1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex-updatecluster.mdwn b/doc/git-annex-updatecluster.mdwn index 403dc12c41..75bc6f41cf 100644 --- a/doc/git-annex-updatecluster.mdwn +++ b/doc/git-annex-updatecluster.mdwn @@ -1,6 +1,6 @@ # NAME -git-annex updatecluster - update records with cluster configuration +git-annex updatecluster - update records of cluster nodes # SYNOPSIS @@ -8,40 +8,16 @@ git-annex updatecluster # DESCRIPTION -A git-annex repository can provide access to its remotes as a unified -cluster. This allows other repositories to access the cluster as a remote, -with uploads and downloads distributed amoung the nodes of the cluster, -according to their preferred content settings. +This command is used to record the nodes of a cluster in the git-annex +branch. -To configure a repository to serve as a proxy to a cluster, first add -remotes to the repository that will serve as nodes of the cluster. -These can be any kind of git-annex remote, including special remotes. +It looks at the git configs `git config remote.name.annex-cluster-node` of +each remote. When that is set to the name of a cluster that has been +initialized with `git-annex initcluster`, the node will be recorded in the +git-annex branch. -For each remote that will be a node of the cluster, -configure `git config remote.name.annex-cluster-node`, setting it to the -name of the cluster. - -Finally, run `git-annex updatecluster` to record the cluster configuration -in the git-annex branch. That tells other repositories about the cluster. - -To later add new nodes to the cluster, or remove existing nodes from the -cluster, set or unset `remote.name.annex-cluster-node` as desired, -and run `git-annex updatecluster` again. - -Example: - - git config remote.foo.annex-cluster-node mycluster - git config remote.bar.annex-cluster-node mycluster - git config remote.baz.annex-cluster-node mycluster - git-annex updatecluster - -Suppose, for example, that remote "bigserver" has had those command run in -it. Then after pulling from "bigserver", git-annex will know about an -additional remote, "bigserver-mycluster", which can be used like any other -remote but is an interface to the cluster as a whole. The individual cluster -nodes will also be proxied as remotes, eg "bigserver-foo". - -Clusters can only be accessed via ssh. +To remove a node from a cluster, unset `remote.name.annex-cluster-node` +and run this command. # OPTIONS @@ -50,7 +26,7 @@ Clusters can only be accessed via ssh. # SEE ALSO [[git-annex]](1) -[[git-annex-preferred-content]](1) +[[git-annex-initcluster]](1) [[git-annex-updateproxy]](1) # AUTHOR diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 7236a21eb1..06b2b63abd 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -326,9 +326,15 @@ content from the key-value store. See [[git-annex-required]](1) for details. +* `initcluster` + + Initializes a new cluster. + + See [[git-annex-initcluster](1) for details. + * `updatecluster` - Update records with cluster configuration. + Update records of cluster nodes. See [[git-annex-updatecluster](1) for details. diff --git a/git-annex.cabal b/git-annex.cabal index 516368a228..3b26ce1df9 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -658,6 +658,7 @@ Executable git-annex Command.Indirect Command.Info Command.Init + Command.InitCluster Command.InitRemote Command.Inprogress Command.List