broke out initcluster

One benefit of this is that a typo in annex-cluster-node config won't
init a new cluster.

Also it gets the cluster description set and is consistent with
initremote.
This commit is contained in:
Joey Hess 2024-06-14 17:13:23 -04:00
parent bfe7f488d9
commit 570ceffe8d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 135 additions and 56 deletions

View file

@ -2,8 +2,8 @@ git-annex (10.20240532) UNRELEASED; urgency=medium
* Added git-annex updateproxy command and remote.name.annex-proxy
configuration.
* Added git-annex updatecluster command and remote.name.annex-cluster-node
and annex.cluster.name configuration.
* Added git-annex initcluster and updatecluster commands,
and remote.name.annex-cluster-node and annex.cluster.name configuration.
* Fix a bug where interrupting git-annex while it is updating the
git-annex branch for an export could later lead to git fsck
complaining about missing tree objects.

View file

@ -124,6 +124,7 @@ import qualified Command.Smudge
import qualified Command.FilterProcess
import qualified Command.Restage
import qualified Command.Undo
import qualified Command.InitCluster
import qualified Command.UpdateCluster
import qualified Command.UpdateProxy
import qualified Command.Version
@ -249,6 +250,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.FilterProcess.cmd
, Command.Restage.cmd
, Command.Undo.cmd
, Command.InitCluster.cmd
, Command.UpdateCluster.cmd
, Command.UpdateProxy.cmd
, Command.Version.cmd

50
Command/InitCluster.hs Normal file
View file

@ -0,0 +1,50 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.InitCluster where
import Command
import qualified Annex
import Types.Cluster
import Logs.UUID
import Config
import Annex.UUID
import Git.Types
import Git.Remote (isLegalName)
import qualified Data.Map as M
cmd :: Command
cmd = command "initcluster" SectionSetup "initialize a new cluster"
(paramPair paramName paramDesc) (withParams seek)
seek :: CmdParams -> CommandSeek
seek (clustername:desc:[]) = commandAction $
start clustername (toUUIDDesc desc)
seek (clustername:[]) = commandAction $
start clustername $ toUUIDDesc ("cluster " ++ clustername)
seek _ = giveup "Expected two parameters, name and description."
start :: RemoteName -> UUIDDesc -> CommandStart
start clustername desc = starting "initcluster" ai si $ do
unless (isLegalName clustername) $
giveup "That cluster name is not a valid git remote name."
myclusters <- annexClusters <$> Annex.getGitConfig
unless (M.member clustername myclusters) $ do
cu <- fromMaybe (giveup "unable to generate a cluster UUID")
<$> genClusterUUID <$> liftIO genUUID
setConfig (annexConfig ("cluster." <> encodeBS clustername))
(fromUUID (fromClusterUUID cu))
describeUUID (fromClusterUUID cu) desc
next $ return True
where
ai = ActionItemOther (Just (UnquotedString clustername))
si = SeekInput [clustername]

View file

@ -13,8 +13,6 @@ import Command
import qualified Annex
import Types.Cluster
import Logs.Cluster
import Config
import Annex.UUID
import qualified Remote as R
import qualified Types.Remote as R
import qualified Command.UpdateProxy
@ -25,7 +23,7 @@ import qualified Data.Set as S
cmd :: Command
cmd = noMessages $ command "updatecluster" SectionSetup
"update records with cluster configuration"
"update records of cluster nodes"
paramNothing (withParams seek)
seek :: CmdParams -> CommandSeek
@ -40,25 +38,13 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do
clusternames <- remoteAnnexClusterNode (R.gitconfig r)
return $ M.fromList $ zip clusternames (repeat (S.singleton r))
let myclusternodes = M.unionsWith S.union (mapMaybe getnode rs)
-- Generate cluster UUIDs and store in git config for each new cluster.
myclusters <- annexClusters <$> Annex.getGitConfig
forM_ (M.keys myclusternodes) $ \clustername ->
unless (M.member clustername myclusters) $ do
liftIO $ putStrLn $ safeOutput $
"Configuring new cluster: " ++ clustername
cu <- fromMaybe (giveup "unable to generate a cluster UUID")
<$> genClusterUUID <$> liftIO genUUID
setConfig (annexConfig ("cluster." <> encodeBS clustername))
(fromUUID (fromClusterUUID cu))
reloadConfig
-- Update the cluster log to list the currently configured nodes
-- of each configured cluster.
myclusters' <- annexClusters <$> Annex.getGitConfig
recordedclusters <- getClusters
descs <- R.uuidDescriptions
forM_ (M.toList myclusters') $ \(clustername, cu) -> do
-- Update the cluster log to list the currently configured nodes
-- of each configured cluster.
forM_ (M.toList myclusters) $ \(clustername, cu) -> do
let mynodesremotes = fromMaybe mempty $
M.lookup clustername myclusternodes
let mynodes = S.map (ClusterNodeUUID . R.uuid) mynodesremotes

View file

@ -0,0 +1,58 @@
# NAME
git-annex initcluster - initialize a new cluster
# SYNOPSIS
git-annex initcluster name [description]
# DESCRIPTION
A git-annex repository can provide access to its remotes as a unified
cluster. This allows other repositories to access the cluster as a remote,
with uploads and downloads distributed amoung the nodes of the cluster,
according to their preferred content settings.
This command initializes a new cluster with the specified name. If no
description is provided, one will be set automatically.
Once a cluster is initialized, the next step is to add nodes to it.
To make a remote be a node of the cluster, configure
`git config remote.name.annex-cluster-node`, setting it to the
name of the cluster.
Finally, run `git-annex updatecluster` to record the cluster configuration
in the git-annex branch. That tells other repositories about the cluster.
Example:
git-annex initcluster mycluster
git config remote.foo.annex-cluster-node mycluster
git config remote.bar.annex-cluster-node mycluster
git config remote.baz.annex-cluster-node mycluster
git-annex updatecluster
Suppose, for example, that remote "bigserver" has had those commands run in
it. Then after pulling from "bigserver", git-annex will know about an
additional remote, "bigserver-mycluster", which can be used like any other
remote but is an interface to the cluster as a whole. The individual cluster
nodes will also be proxied as remotes, eg "bigserver-foo".
Clusters can only be accessed via ssh.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-updatecluster]](1)
[[git-annex-preferred-content]](1)
[[git-annex-updateproxy]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -1,6 +1,6 @@
# NAME
git-annex updatecluster - update records with cluster configuration
git-annex updatecluster - update records of cluster nodes
# SYNOPSIS
@ -8,40 +8,16 @@ git-annex updatecluster
# DESCRIPTION
A git-annex repository can provide access to its remotes as a unified
cluster. This allows other repositories to access the cluster as a remote,
with uploads and downloads distributed amoung the nodes of the cluster,
according to their preferred content settings.
This command is used to record the nodes of a cluster in the git-annex
branch.
To configure a repository to serve as a proxy to a cluster, first add
remotes to the repository that will serve as nodes of the cluster.
These can be any kind of git-annex remote, including special remotes.
It looks at the git configs `git config remote.name.annex-cluster-node` of
each remote. When that is set to the name of a cluster that has been
initialized with `git-annex initcluster`, the node will be recorded in the
git-annex branch.
For each remote that will be a node of the cluster,
configure `git config remote.name.annex-cluster-node`, setting it to the
name of the cluster.
Finally, run `git-annex updatecluster` to record the cluster configuration
in the git-annex branch. That tells other repositories about the cluster.
To later add new nodes to the cluster, or remove existing nodes from the
cluster, set or unset `remote.name.annex-cluster-node` as desired,
and run `git-annex updatecluster` again.
Example:
git config remote.foo.annex-cluster-node mycluster
git config remote.bar.annex-cluster-node mycluster
git config remote.baz.annex-cluster-node mycluster
git-annex updatecluster
Suppose, for example, that remote "bigserver" has had those command run in
it. Then after pulling from "bigserver", git-annex will know about an
additional remote, "bigserver-mycluster", which can be used like any other
remote but is an interface to the cluster as a whole. The individual cluster
nodes will also be proxied as remotes, eg "bigserver-foo".
Clusters can only be accessed via ssh.
To remove a node from a cluster, unset `remote.name.annex-cluster-node`
and run this command.
# OPTIONS
@ -50,7 +26,7 @@ Clusters can only be accessed via ssh.
# SEE ALSO
[[git-annex]](1)
[[git-annex-preferred-content]](1)
[[git-annex-initcluster]](1)
[[git-annex-updateproxy]](1)
# AUTHOR

View file

@ -326,9 +326,15 @@ content from the key-value store.
See [[git-annex-required]](1) for details.
* `initcluster`
Initializes a new cluster.
See [[git-annex-initcluster](1) for details.
* `updatecluster`
Update records with cluster configuration.
Update records of cluster nodes.
See [[git-annex-updatecluster](1) for details.

View file

@ -658,6 +658,7 @@ Executable git-annex
Command.Indirect
Command.Info
Command.Init
Command.InitCluster
Command.InitRemote
Command.Inprogress
Command.List