From bbf261487d72be665b3675f725798f034bff3eda Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 14 Jun 2024 14:21:50 -0400 Subject: [PATCH] add git-annex updatecluster command Seems to work fine, making the right changes to the git-annex branch. --- CHANGELOG | 2 +- CmdLine/GitAnnex.hs | 2 ++ Command/UpdateProxy.hs | 4 ++- Git/Remote.hs | 5 ++- Git/Types.hs | 3 ++ Logs/Cluster.hs | 4 +-- Logs/Proxy.hs | 3 +- Types/Cluster.hs | 23 +++++++----- Types/GitConfig.hs | 19 +++++----- doc/git-annex-updatecluster.mdwn | 60 ++++++++++++++++++++++++++++++++ doc/git-annex-updateproxy.mdwn | 6 ++-- doc/git-annex.mdwn | 10 ++++-- git-annex.cabal | 1 + 13 files changed, 114 insertions(+), 28 deletions(-) create mode 100644 doc/git-annex-updatecluster.mdwn diff --git a/CHANGELOG b/CHANGELOG index 89df0811c6..a021ca783f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,7 +2,7 @@ git-annex (10.20240532) UNRELEASED; urgency=medium * Added git-annex updateproxy command and remote.name.annex-proxy configuration. - * Added git-annex cluster command and remote.name.annex-cluster-node + * Added git-annex updatecluster command and remote.name.annex-cluster-node and annex.cluster.name configuration. * Fix a bug where interrupting git-annex while it is updating the git-annex branch for an export could later lead to git fsck diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 2f2bf7c86e..11e9c9ad57 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -124,6 +124,7 @@ import qualified Command.Smudge import qualified Command.FilterProcess import qualified Command.Restage import qualified Command.Undo +import qualified Command.UpdateCluster import qualified Command.UpdateProxy import qualified Command.Version import qualified Command.RemoteDaemon @@ -248,6 +249,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption , Command.FilterProcess.cmd , Command.Restage.cmd , Command.Undo.cmd + , Command.UpdateCluster.cmd , Command.UpdateProxy.cmd , Command.Version.cmd , Command.RemoteDaemon.cmd diff --git a/Command/UpdateProxy.hs b/Command/UpdateProxy.hs index da09c64a21..b1615eedb9 100644 --- a/Command/UpdateProxy.hs +++ b/Command/UpdateProxy.hs @@ -30,7 +30,7 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do rs <- R.remoteList let proxies = S.fromList $ map (\r -> Proxy (R.uuid r) (R.name r)) $ - filter (remoteAnnexProxy . R.gitconfig) rs + filter (isproxy . R.gitconfig) rs u <- getUUID oldproxies <- fromMaybe mempty . M.lookup u <$> getProxies if oldproxies == proxies @@ -50,3 +50,5 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do putStrLn $ safeOutput $ "Stopped proxying for " ++ proxyRemoteName p _ -> noop + + isproxy c = remoteAnnexProxy c || not (null (remoteAnnexClusterNode c)) diff --git a/Git/Remote.hs b/Git/Remote.hs index 8990509d60..3a8945c146 100644 --- a/Git/Remote.hs +++ b/Git/Remote.hs @@ -68,7 +68,10 @@ makeLegalName s = case filter legal $ replace "/" "_" s of legal '-' = True legal '.' = True legal c = isAlphaNum c - + +isLegalName :: String -> Bool +isLegalName s = s == makeLegalName s + data RemoteLocation = RemoteUrl String | RemotePath FilePath deriving (Eq, Show) diff --git a/Git/Types.hs b/Git/Types.hs index ef7b159cae..3f4410fc0d 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -84,6 +84,9 @@ instance Default ConfigValue where fromConfigKey :: ConfigKey -> String fromConfigKey (ConfigKey s) = decodeBS s +fromConfigKey' :: ConfigKey -> S.ByteString +fromConfigKey' (ConfigKey s) = s + instance Show ConfigKey where show = fromConfigKey diff --git a/Logs/Cluster.hs b/Logs/Cluster.hs index 12fe936c72..dbfe711b84 100644 --- a/Logs/Cluster.hs +++ b/Logs/Cluster.hs @@ -21,7 +21,6 @@ import Types.Cluster import Logs import Logs.UUIDBased import Logs.MapLog -import Annex.UUID import qualified Data.Set as S import qualified Data.Map as M @@ -61,10 +60,9 @@ recordCluster clusteruuid nodeuuids = do nodeuuids c <- currentVectorClock - u <- getUUID Annex.Branch.change (Annex.Branch.RegardingUUID [fromClusterUUID clusteruuid]) clusterLog $ (buildLogNew buildClusterNodeList) - . changeLog c u nodeuuids' + . changeLog c (fromClusterUUID clusteruuid) nodeuuids' . parseClusterLog buildClusterNodeList :: S.Set ClusterNodeUUID -> Builder diff --git a/Logs/Proxy.hs b/Logs/Proxy.hs index 4772ce0258..19bbf206ca 100644 --- a/Logs/Proxy.hs +++ b/Logs/Proxy.hs @@ -85,5 +85,4 @@ parseProxyList = S.fromList <$> many parseword -- characters in names, and ensures the name can be used anywhere a usual -- git remote name can be used without causing issues. validateProxies :: S.Set Proxy -> S.Set Proxy -validateProxies = S.filter $ \p -> - Git.Remote.makeLegalName (proxyRemoteName p) == proxyRemoteName p +validateProxies = S.filter $ Git.Remote.isLegalName . proxyRemoteName diff --git a/Types/Cluster.hs b/Types/Cluster.hs index 08a349f3ba..47595e6cfc 100644 --- a/Types/Cluster.hs +++ b/Types/Cluster.hs @@ -10,6 +10,7 @@ module Types.Cluster ( ClusterUUID, mkClusterUUID, + genClusterUUID, isClusterUUID, fromClusterUUID, ClusterNodeUUID(..), @@ -32,17 +33,13 @@ import Data.Char newtype ClusterUUID = ClusterUUID UUID deriving (Show, Eq, Ord) --- Smart constructor for a ClusterUUID. --- --- The input UUID can be any regular UUID (eg V4). It is converted to a valid --- cluster UUID. +-- Smart constructor for a ClusterUUID. Only allows valid cluster UUIDs. mkClusterUUID :: UUID -> Maybe ClusterUUID -mkClusterUUID (UUID b) - | B.length b > 14 = Just $ ClusterUUID $ UUID $ - "ac" <> B.drop 2 (B.take 14 b) <> "8" <> B.drop 15 b +mkClusterUUID u + | isClusterUUID u = Just (ClusterUUID u) | otherwise = Nothing -mkClusterUUID NoUUID = Nothing +-- Check if it is a valid cluster UUID. isClusterUUID :: UUID -> Bool isClusterUUID (UUID b) | B.take 2 b == "ac" = @@ -55,6 +52,15 @@ isClusterUUID (UUID b) eight = fromIntegral (ord '8') isClusterUUID _ = False +-- Generates a ClusterUUID from any regular UUID (eg V4). +-- It is converted to a valid cluster UUID. +genClusterUUID :: UUID -> Maybe ClusterUUID +genClusterUUID (UUID b) + | B.length b > 14 = Just $ ClusterUUID $ UUID $ + "ac" <> B.drop 2 (B.take 14 b) <> "8" <> B.drop 15 b + | otherwise = Nothing +genClusterUUID NoUUID = Nothing + fromClusterUUID :: ClusterUUID -> UUID fromClusterUUID (ClusterUUID u) = u @@ -69,3 +75,4 @@ data Clusters = Clusters { clusterUUIDs :: M.Map ClusterUUID (S.Set ClusterNodeUUID) , clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID) } + deriving (Show) diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 016e51a68a..c4a739e454 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -31,7 +31,7 @@ import qualified Git.Config import qualified Git.Construct import Git.Types import Git.ConfigTypes -import Git.Remote (isRemoteKey, remoteKeyToRemoteName) +import Git.Remote (isRemoteKey, isLegalName, remoteKeyToRemoteName) import Git.Branch (CommitMode(..)) import Git.Quote (QuotePath(..)) import Utility.DataUnits @@ -156,7 +156,7 @@ data GitConfig = GitConfig , annexPrivateRepos :: S.Set UUID , annexAdviceNoSshCaching :: Bool , annexViewUnsetDirectory :: ViewUnset - , annexClusters :: M.Map String ClusterUUID + , annexClusters :: M.Map RemoteName ClusterUUID } extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig @@ -287,10 +287,8 @@ extractGitConfig configsource r = GitConfig getmaybe (annexConfig "viewunsetdirectory") , annexClusters = M.mapMaybe (mkClusterUUID . toUUID) $ - M.mapKeys (drop (B.length clusterprefix) . fromConfigKey) $ - M.filterWithKey - (\(ConfigKey k) _ -> clusterprefix `B.isPrefixOf` k) - (config r) + M.mapKeys removeclusterprefix $ + M.filterWithKey isclusternamekey (config r) } where getbool k d = fromMaybe d $ getmaybebool k @@ -316,6 +314,9 @@ extractGitConfig configsource r = GitConfig hereuuid = maybe NoUUID toUUID $ getmaybe (annexConfig "uuid") clusterprefix = annexConfigPrefix <> "cluster." + isclusternamekey k _ = clusterprefix `B.isPrefixOf` (fromConfigKey' k) + && isLegalName (removeclusterprefix k) + removeclusterprefix k = drop (B.length clusterprefix) (fromConfigKey k) {- Merge a GitConfig that comes from git-config with one containing - repository-global defaults. -} @@ -387,7 +388,7 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexMaxGitBundles :: Int , remoteAnnexAllowEncryptedGitRepo :: Bool , remoteAnnexProxy :: Bool - , remoteAnnexClusterNode :: Maybe [String] + , remoteAnnexClusterNode :: Maybe [RemoteName] , remoteUrl :: Maybe String {- These settings are specific to particular types of remotes @@ -473,7 +474,9 @@ extractRemoteGitConfig r remotename = do , remoteAnnexAllowEncryptedGitRepo = getbool AllowEncryptedGitRepoField False , remoteAnnexProxy = getbool ProxyField False - , remoteAnnexClusterNode = words <$> getmaybe ClusterNodeField + , remoteAnnexClusterNode = + (filter isLegalName . words) + <$> getmaybe ClusterNodeField , remoteUrl = case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of Just (ConfigValue b) diff --git a/doc/git-annex-updatecluster.mdwn b/doc/git-annex-updatecluster.mdwn new file mode 100644 index 0000000000..403dc12c41 --- /dev/null +++ b/doc/git-annex-updatecluster.mdwn @@ -0,0 +1,60 @@ +# NAME + +git-annex updatecluster - update records with cluster configuration + +# SYNOPSIS + +git-annex updatecluster + +# DESCRIPTION + +A git-annex repository can provide access to its remotes as a unified +cluster. This allows other repositories to access the cluster as a remote, +with uploads and downloads distributed amoung the nodes of the cluster, +according to their preferred content settings. + +To configure a repository to serve as a proxy to a cluster, first add +remotes to the repository that will serve as nodes of the cluster. +These can be any kind of git-annex remote, including special remotes. + +For each remote that will be a node of the cluster, +configure `git config remote.name.annex-cluster-node`, setting it to the +name of the cluster. + +Finally, run `git-annex updatecluster` to record the cluster configuration +in the git-annex branch. That tells other repositories about the cluster. + +To later add new nodes to the cluster, or remove existing nodes from the +cluster, set or unset `remote.name.annex-cluster-node` as desired, +and run `git-annex updatecluster` again. + +Example: + + git config remote.foo.annex-cluster-node mycluster + git config remote.bar.annex-cluster-node mycluster + git config remote.baz.annex-cluster-node mycluster + git-annex updatecluster + +Suppose, for example, that remote "bigserver" has had those command run in +it. Then after pulling from "bigserver", git-annex will know about an +additional remote, "bigserver-mycluster", which can be used like any other +remote but is an interface to the cluster as a whole. The individual cluster +nodes will also be proxied as remotes, eg "bigserver-foo". + +Clusters can only be accessed via ssh. + +# OPTIONS + +* The [[git-annex-common-options]](1) can be used. + +# SEE ALSO + +[[git-annex]](1) +[[git-annex-preferred-content]](1) +[[git-annex-updateproxy]](1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex-updateproxy.mdwn b/doc/git-annex-updateproxy.mdwn index 9ce216e52b..d6eb7c6398 100644 --- a/doc/git-annex-updateproxy.mdwn +++ b/doc/git-annex-updateproxy.mdwn @@ -24,7 +24,9 @@ configuration. Suppose, for example, that remote "work" has had this command run in it. Then after pulling from "work", git-annex will know about an additional remote, "work-foo". That remote will be accessed using "work" as -a proxy. (This only works for remotes accessed over ssh.) +a proxy. + +Proxies can only be accessed via ssh. # OPTIONS @@ -33,7 +35,7 @@ a proxy. (This only works for remotes accessed over ssh.) # SEE ALSO [[git-annex]](1) -[[git-annex-cluster]](1) +[[git-annex-updatecluster]](1) # AUTHOR diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index a6a303610c..7236a21eb1 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -326,6 +326,12 @@ content from the key-value store. See [[git-annex-required]](1) for details. +* `updatecluster` + + Update records with cluster configuration. + + See [[git-annex-updatecluster](1) for details. + * `updateproxy` Update records with proxy configuration. @@ -1379,8 +1385,8 @@ repository, using [[git-annex-config]]. See its man page for a list.) * `annex.cluster.` - [[git-annex-cluster]] sets this to the UUID of a cluster, to - enable the local repository to act as a proxy to the cluster. + [[git-annex-updatecluster]] sets this to the UUID of a cluster + based on `remote..annex-cluster-node` configuration. Note that cluster UUIDs are not the same as repository UUIDs, and a repository UUID cannot be used here. diff --git a/git-annex.cabal b/git-annex.cabal index 24ff6bba03..516368a228 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -720,6 +720,7 @@ Executable git-annex Command.UnregisterUrl Command.Untrust Command.Unused + Command.UpdateCluster Command.UpdateProxy Command.Upgrade Command.VAdd