add git-annex updatecluster command

Seems to work fine, making the right changes to the git-annex branch.
This commit is contained in:
Joey Hess 2024-06-14 14:21:50 -04:00
parent 2844230dfe
commit bbf261487d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 114 additions and 28 deletions

View file

@ -2,7 +2,7 @@ git-annex (10.20240532) UNRELEASED; urgency=medium
* Added git-annex updateproxy command and remote.name.annex-proxy
configuration.
* Added git-annex cluster command and remote.name.annex-cluster-node
* Added git-annex updatecluster command and remote.name.annex-cluster-node
and annex.cluster.name configuration.
* Fix a bug where interrupting git-annex while it is updating the
git-annex branch for an export could later lead to git fsck

View file

@ -124,6 +124,7 @@ import qualified Command.Smudge
import qualified Command.FilterProcess
import qualified Command.Restage
import qualified Command.Undo
import qualified Command.UpdateCluster
import qualified Command.UpdateProxy
import qualified Command.Version
import qualified Command.RemoteDaemon
@ -248,6 +249,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.FilterProcess.cmd
, Command.Restage.cmd
, Command.Undo.cmd
, Command.UpdateCluster.cmd
, Command.UpdateProxy.cmd
, Command.Version.cmd
, Command.RemoteDaemon.cmd

View file

@ -30,7 +30,7 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList
let proxies = S.fromList $
map (\r -> Proxy (R.uuid r) (R.name r)) $
filter (remoteAnnexProxy . R.gitconfig) rs
filter (isproxy . R.gitconfig) rs
u <- getUUID
oldproxies <- fromMaybe mempty . M.lookup u <$> getProxies
if oldproxies == proxies
@ -50,3 +50,5 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do
putStrLn $ safeOutput $
"Stopped proxying for " ++ proxyRemoteName p
_ -> noop
isproxy c = remoteAnnexProxy c || not (null (remoteAnnexClusterNode c))

View file

@ -68,7 +68,10 @@ makeLegalName s = case filter legal $ replace "/" "_" s of
legal '-' = True
legal '.' = True
legal c = isAlphaNum c
isLegalName :: String -> Bool
isLegalName s = s == makeLegalName s
data RemoteLocation = RemoteUrl String | RemotePath FilePath
deriving (Eq, Show)

View file

@ -84,6 +84,9 @@ instance Default ConfigValue where
fromConfigKey :: ConfigKey -> String
fromConfigKey (ConfigKey s) = decodeBS s
fromConfigKey' :: ConfigKey -> S.ByteString
fromConfigKey' (ConfigKey s) = s
instance Show ConfigKey where
show = fromConfigKey

View file

@ -21,7 +21,6 @@ import Types.Cluster
import Logs
import Logs.UUIDBased
import Logs.MapLog
import Annex.UUID
import qualified Data.Set as S
import qualified Data.Map as M
@ -61,10 +60,9 @@ recordCluster clusteruuid nodeuuids = do
nodeuuids
c <- currentVectorClock
u <- getUUID
Annex.Branch.change (Annex.Branch.RegardingUUID [fromClusterUUID clusteruuid]) clusterLog $
(buildLogNew buildClusterNodeList)
. changeLog c u nodeuuids'
. changeLog c (fromClusterUUID clusteruuid) nodeuuids'
. parseClusterLog
buildClusterNodeList :: S.Set ClusterNodeUUID -> Builder

View file

@ -85,5 +85,4 @@ parseProxyList = S.fromList <$> many parseword
-- characters in names, and ensures the name can be used anywhere a usual
-- git remote name can be used without causing issues.
validateProxies :: S.Set Proxy -> S.Set Proxy
validateProxies = S.filter $ \p ->
Git.Remote.makeLegalName (proxyRemoteName p) == proxyRemoteName p
validateProxies = S.filter $ Git.Remote.isLegalName . proxyRemoteName

View file

@ -10,6 +10,7 @@
module Types.Cluster (
ClusterUUID,
mkClusterUUID,
genClusterUUID,
isClusterUUID,
fromClusterUUID,
ClusterNodeUUID(..),
@ -32,17 +33,13 @@ import Data.Char
newtype ClusterUUID = ClusterUUID UUID
deriving (Show, Eq, Ord)
-- Smart constructor for a ClusterUUID.
--
-- The input UUID can be any regular UUID (eg V4). It is converted to a valid
-- cluster UUID.
-- Smart constructor for a ClusterUUID. Only allows valid cluster UUIDs.
mkClusterUUID :: UUID -> Maybe ClusterUUID
mkClusterUUID (UUID b)
| B.length b > 14 = Just $ ClusterUUID $ UUID $
"ac" <> B.drop 2 (B.take 14 b) <> "8" <> B.drop 15 b
mkClusterUUID u
| isClusterUUID u = Just (ClusterUUID u)
| otherwise = Nothing
mkClusterUUID NoUUID = Nothing
-- Check if it is a valid cluster UUID.
isClusterUUID :: UUID -> Bool
isClusterUUID (UUID b)
| B.take 2 b == "ac" =
@ -55,6 +52,15 @@ isClusterUUID (UUID b)
eight = fromIntegral (ord '8')
isClusterUUID _ = False
-- Generates a ClusterUUID from any regular UUID (eg V4).
-- It is converted to a valid cluster UUID.
genClusterUUID :: UUID -> Maybe ClusterUUID
genClusterUUID (UUID b)
| B.length b > 14 = Just $ ClusterUUID $ UUID $
"ac" <> B.drop 2 (B.take 14 b) <> "8" <> B.drop 15 b
| otherwise = Nothing
genClusterUUID NoUUID = Nothing
fromClusterUUID :: ClusterUUID -> UUID
fromClusterUUID (ClusterUUID u) = u
@ -69,3 +75,4 @@ data Clusters = Clusters
{ clusterUUIDs :: M.Map ClusterUUID (S.Set ClusterNodeUUID)
, clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID)
}
deriving (Show)

View file

@ -31,7 +31,7 @@ import qualified Git.Config
import qualified Git.Construct
import Git.Types
import Git.ConfigTypes
import Git.Remote (isRemoteKey, remoteKeyToRemoteName)
import Git.Remote (isRemoteKey, isLegalName, remoteKeyToRemoteName)
import Git.Branch (CommitMode(..))
import Git.Quote (QuotePath(..))
import Utility.DataUnits
@ -156,7 +156,7 @@ data GitConfig = GitConfig
, annexPrivateRepos :: S.Set UUID
, annexAdviceNoSshCaching :: Bool
, annexViewUnsetDirectory :: ViewUnset
, annexClusters :: M.Map String ClusterUUID
, annexClusters :: M.Map RemoteName ClusterUUID
}
extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig
@ -287,10 +287,8 @@ extractGitConfig configsource r = GitConfig
getmaybe (annexConfig "viewunsetdirectory")
, annexClusters =
M.mapMaybe (mkClusterUUID . toUUID) $
M.mapKeys (drop (B.length clusterprefix) . fromConfigKey) $
M.filterWithKey
(\(ConfigKey k) _ -> clusterprefix `B.isPrefixOf` k)
(config r)
M.mapKeys removeclusterprefix $
M.filterWithKey isclusternamekey (config r)
}
where
getbool k d = fromMaybe d $ getmaybebool k
@ -316,6 +314,9 @@ extractGitConfig configsource r = GitConfig
hereuuid = maybe NoUUID toUUID $ getmaybe (annexConfig "uuid")
clusterprefix = annexConfigPrefix <> "cluster."
isclusternamekey k _ = clusterprefix `B.isPrefixOf` (fromConfigKey' k)
&& isLegalName (removeclusterprefix k)
removeclusterprefix k = drop (B.length clusterprefix) (fromConfigKey k)
{- Merge a GitConfig that comes from git-config with one containing
- repository-global defaults. -}
@ -387,7 +388,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexMaxGitBundles :: Int
, remoteAnnexAllowEncryptedGitRepo :: Bool
, remoteAnnexProxy :: Bool
, remoteAnnexClusterNode :: Maybe [String]
, remoteAnnexClusterNode :: Maybe [RemoteName]
, remoteUrl :: Maybe String
{- These settings are specific to particular types of remotes
@ -473,7 +474,9 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexAllowEncryptedGitRepo =
getbool AllowEncryptedGitRepoField False
, remoteAnnexProxy = getbool ProxyField False
, remoteAnnexClusterNode = words <$> getmaybe ClusterNodeField
, remoteAnnexClusterNode =
(filter isLegalName . words)
<$> getmaybe ClusterNodeField
, remoteUrl =
case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of
Just (ConfigValue b)

View file

@ -0,0 +1,60 @@
# NAME
git-annex updatecluster - update records with cluster configuration
# SYNOPSIS
git-annex updatecluster
# DESCRIPTION
A git-annex repository can provide access to its remotes as a unified
cluster. This allows other repositories to access the cluster as a remote,
with uploads and downloads distributed amoung the nodes of the cluster,
according to their preferred content settings.
To configure a repository to serve as a proxy to a cluster, first add
remotes to the repository that will serve as nodes of the cluster.
These can be any kind of git-annex remote, including special remotes.
For each remote that will be a node of the cluster,
configure `git config remote.name.annex-cluster-node`, setting it to the
name of the cluster.
Finally, run `git-annex updatecluster` to record the cluster configuration
in the git-annex branch. That tells other repositories about the cluster.
To later add new nodes to the cluster, or remove existing nodes from the
cluster, set or unset `remote.name.annex-cluster-node` as desired,
and run `git-annex updatecluster` again.
Example:
git config remote.foo.annex-cluster-node mycluster
git config remote.bar.annex-cluster-node mycluster
git config remote.baz.annex-cluster-node mycluster
git-annex updatecluster
Suppose, for example, that remote "bigserver" has had those command run in
it. Then after pulling from "bigserver", git-annex will know about an
additional remote, "bigserver-mycluster", which can be used like any other
remote but is an interface to the cluster as a whole. The individual cluster
nodes will also be proxied as remotes, eg "bigserver-foo".
Clusters can only be accessed via ssh.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-preferred-content]](1)
[[git-annex-updateproxy]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -24,7 +24,9 @@ configuration.
Suppose, for example, that remote "work" has had this command run in
it. Then after pulling from "work", git-annex will know about an
additional remote, "work-foo". That remote will be accessed using "work" as
a proxy. (This only works for remotes accessed over ssh.)
a proxy.
Proxies can only be accessed via ssh.
# OPTIONS
@ -33,7 +35,7 @@ a proxy. (This only works for remotes accessed over ssh.)
# SEE ALSO
[[git-annex]](1)
[[git-annex-cluster]](1)
[[git-annex-updatecluster]](1)
# AUTHOR

View file

@ -326,6 +326,12 @@ content from the key-value store.
See [[git-annex-required]](1) for details.
* `updatecluster`
Update records with cluster configuration.
See [[git-annex-updatecluster](1) for details.
* `updateproxy`
Update records with proxy configuration.
@ -1379,8 +1385,8 @@ repository, using [[git-annex-config]]. See its man page for a list.)
* `annex.cluster.<name>`
[[git-annex-cluster]] sets this to the UUID of a cluster, to
enable the local repository to act as a proxy to the cluster.
[[git-annex-updatecluster]] sets this to the UUID of a cluster
based on `remote.<name>.annex-cluster-node` configuration.
Note that cluster UUIDs are not the same as repository UUIDs,
and a repository UUID cannot be used here.

View file

@ -720,6 +720,7 @@ Executable git-annex
Command.UnregisterUrl
Command.Untrust
Command.Unused
Command.UpdateCluster
Command.UpdateProxy
Command.Upgrade
Command.VAdd