add git-annex updatecluster command

Seems to work fine, making the right changes to the git-annex branch.
This commit is contained in:
Joey Hess 2024-06-14 14:21:50 -04:00
parent 2844230dfe
commit bbf261487d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 114 additions and 28 deletions

View file

@ -2,7 +2,7 @@ git-annex (10.20240532) UNRELEASED; urgency=medium
* Added git-annex updateproxy command and remote.name.annex-proxy * Added git-annex updateproxy command and remote.name.annex-proxy
configuration. configuration.
* Added git-annex cluster command and remote.name.annex-cluster-node * Added git-annex updatecluster command and remote.name.annex-cluster-node
and annex.cluster.name configuration. and annex.cluster.name configuration.
* Fix a bug where interrupting git-annex while it is updating the * Fix a bug where interrupting git-annex while it is updating the
git-annex branch for an export could later lead to git fsck git-annex branch for an export could later lead to git fsck

View file

@ -124,6 +124,7 @@ import qualified Command.Smudge
import qualified Command.FilterProcess import qualified Command.FilterProcess
import qualified Command.Restage import qualified Command.Restage
import qualified Command.Undo import qualified Command.Undo
import qualified Command.UpdateCluster
import qualified Command.UpdateProxy import qualified Command.UpdateProxy
import qualified Command.Version import qualified Command.Version
import qualified Command.RemoteDaemon import qualified Command.RemoteDaemon
@ -248,6 +249,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.FilterProcess.cmd , Command.FilterProcess.cmd
, Command.Restage.cmd , Command.Restage.cmd
, Command.Undo.cmd , Command.Undo.cmd
, Command.UpdateCluster.cmd
, Command.UpdateProxy.cmd , Command.UpdateProxy.cmd
, Command.Version.cmd , Command.Version.cmd
, Command.RemoteDaemon.cmd , Command.RemoteDaemon.cmd

View file

@ -30,7 +30,7 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList rs <- R.remoteList
let proxies = S.fromList $ let proxies = S.fromList $
map (\r -> Proxy (R.uuid r) (R.name r)) $ map (\r -> Proxy (R.uuid r) (R.name r)) $
filter (remoteAnnexProxy . R.gitconfig) rs filter (isproxy . R.gitconfig) rs
u <- getUUID u <- getUUID
oldproxies <- fromMaybe mempty . M.lookup u <$> getProxies oldproxies <- fromMaybe mempty . M.lookup u <$> getProxies
if oldproxies == proxies if oldproxies == proxies
@ -50,3 +50,5 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do
putStrLn $ safeOutput $ putStrLn $ safeOutput $
"Stopped proxying for " ++ proxyRemoteName p "Stopped proxying for " ++ proxyRemoteName p
_ -> noop _ -> noop
isproxy c = remoteAnnexProxy c || not (null (remoteAnnexClusterNode c))

View file

@ -68,7 +68,10 @@ makeLegalName s = case filter legal $ replace "/" "_" s of
legal '-' = True legal '-' = True
legal '.' = True legal '.' = True
legal c = isAlphaNum c legal c = isAlphaNum c
isLegalName :: String -> Bool
isLegalName s = s == makeLegalName s
data RemoteLocation = RemoteUrl String | RemotePath FilePath data RemoteLocation = RemoteUrl String | RemotePath FilePath
deriving (Eq, Show) deriving (Eq, Show)

View file

@ -84,6 +84,9 @@ instance Default ConfigValue where
fromConfigKey :: ConfigKey -> String fromConfigKey :: ConfigKey -> String
fromConfigKey (ConfigKey s) = decodeBS s fromConfigKey (ConfigKey s) = decodeBS s
fromConfigKey' :: ConfigKey -> S.ByteString
fromConfigKey' (ConfigKey s) = s
instance Show ConfigKey where instance Show ConfigKey where
show = fromConfigKey show = fromConfigKey

View file

@ -21,7 +21,6 @@ import Types.Cluster
import Logs import Logs
import Logs.UUIDBased import Logs.UUIDBased
import Logs.MapLog import Logs.MapLog
import Annex.UUID
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Map as M import qualified Data.Map as M
@ -61,10 +60,9 @@ recordCluster clusteruuid nodeuuids = do
nodeuuids nodeuuids
c <- currentVectorClock c <- currentVectorClock
u <- getUUID
Annex.Branch.change (Annex.Branch.RegardingUUID [fromClusterUUID clusteruuid]) clusterLog $ Annex.Branch.change (Annex.Branch.RegardingUUID [fromClusterUUID clusteruuid]) clusterLog $
(buildLogNew buildClusterNodeList) (buildLogNew buildClusterNodeList)
. changeLog c u nodeuuids' . changeLog c (fromClusterUUID clusteruuid) nodeuuids'
. parseClusterLog . parseClusterLog
buildClusterNodeList :: S.Set ClusterNodeUUID -> Builder buildClusterNodeList :: S.Set ClusterNodeUUID -> Builder

View file

@ -85,5 +85,4 @@ parseProxyList = S.fromList <$> many parseword
-- characters in names, and ensures the name can be used anywhere a usual -- characters in names, and ensures the name can be used anywhere a usual
-- git remote name can be used without causing issues. -- git remote name can be used without causing issues.
validateProxies :: S.Set Proxy -> S.Set Proxy validateProxies :: S.Set Proxy -> S.Set Proxy
validateProxies = S.filter $ \p -> validateProxies = S.filter $ Git.Remote.isLegalName . proxyRemoteName
Git.Remote.makeLegalName (proxyRemoteName p) == proxyRemoteName p

View file

@ -10,6 +10,7 @@
module Types.Cluster ( module Types.Cluster (
ClusterUUID, ClusterUUID,
mkClusterUUID, mkClusterUUID,
genClusterUUID,
isClusterUUID, isClusterUUID,
fromClusterUUID, fromClusterUUID,
ClusterNodeUUID(..), ClusterNodeUUID(..),
@ -32,17 +33,13 @@ import Data.Char
newtype ClusterUUID = ClusterUUID UUID newtype ClusterUUID = ClusterUUID UUID
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord)
-- Smart constructor for a ClusterUUID. -- Smart constructor for a ClusterUUID. Only allows valid cluster UUIDs.
--
-- The input UUID can be any regular UUID (eg V4). It is converted to a valid
-- cluster UUID.
mkClusterUUID :: UUID -> Maybe ClusterUUID mkClusterUUID :: UUID -> Maybe ClusterUUID
mkClusterUUID (UUID b) mkClusterUUID u
| B.length b > 14 = Just $ ClusterUUID $ UUID $ | isClusterUUID u = Just (ClusterUUID u)
"ac" <> B.drop 2 (B.take 14 b) <> "8" <> B.drop 15 b
| otherwise = Nothing | otherwise = Nothing
mkClusterUUID NoUUID = Nothing
-- Check if it is a valid cluster UUID.
isClusterUUID :: UUID -> Bool isClusterUUID :: UUID -> Bool
isClusterUUID (UUID b) isClusterUUID (UUID b)
| B.take 2 b == "ac" = | B.take 2 b == "ac" =
@ -55,6 +52,15 @@ isClusterUUID (UUID b)
eight = fromIntegral (ord '8') eight = fromIntegral (ord '8')
isClusterUUID _ = False isClusterUUID _ = False
-- Generates a ClusterUUID from any regular UUID (eg V4).
-- It is converted to a valid cluster UUID.
genClusterUUID :: UUID -> Maybe ClusterUUID
genClusterUUID (UUID b)
| B.length b > 14 = Just $ ClusterUUID $ UUID $
"ac" <> B.drop 2 (B.take 14 b) <> "8" <> B.drop 15 b
| otherwise = Nothing
genClusterUUID NoUUID = Nothing
fromClusterUUID :: ClusterUUID -> UUID fromClusterUUID :: ClusterUUID -> UUID
fromClusterUUID (ClusterUUID u) = u fromClusterUUID (ClusterUUID u) = u
@ -69,3 +75,4 @@ data Clusters = Clusters
{ clusterUUIDs :: M.Map ClusterUUID (S.Set ClusterNodeUUID) { clusterUUIDs :: M.Map ClusterUUID (S.Set ClusterNodeUUID)
, clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID) , clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID)
} }
deriving (Show)

View file

@ -31,7 +31,7 @@ import qualified Git.Config
import qualified Git.Construct import qualified Git.Construct
import Git.Types import Git.Types
import Git.ConfigTypes import Git.ConfigTypes
import Git.Remote (isRemoteKey, remoteKeyToRemoteName) import Git.Remote (isRemoteKey, isLegalName, remoteKeyToRemoteName)
import Git.Branch (CommitMode(..)) import Git.Branch (CommitMode(..))
import Git.Quote (QuotePath(..)) import Git.Quote (QuotePath(..))
import Utility.DataUnits import Utility.DataUnits
@ -156,7 +156,7 @@ data GitConfig = GitConfig
, annexPrivateRepos :: S.Set UUID , annexPrivateRepos :: S.Set UUID
, annexAdviceNoSshCaching :: Bool , annexAdviceNoSshCaching :: Bool
, annexViewUnsetDirectory :: ViewUnset , annexViewUnsetDirectory :: ViewUnset
, annexClusters :: M.Map String ClusterUUID , annexClusters :: M.Map RemoteName ClusterUUID
} }
extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig
@ -287,10 +287,8 @@ extractGitConfig configsource r = GitConfig
getmaybe (annexConfig "viewunsetdirectory") getmaybe (annexConfig "viewunsetdirectory")
, annexClusters = , annexClusters =
M.mapMaybe (mkClusterUUID . toUUID) $ M.mapMaybe (mkClusterUUID . toUUID) $
M.mapKeys (drop (B.length clusterprefix) . fromConfigKey) $ M.mapKeys removeclusterprefix $
M.filterWithKey M.filterWithKey isclusternamekey (config r)
(\(ConfigKey k) _ -> clusterprefix `B.isPrefixOf` k)
(config r)
} }
where where
getbool k d = fromMaybe d $ getmaybebool k getbool k d = fromMaybe d $ getmaybebool k
@ -316,6 +314,9 @@ extractGitConfig configsource r = GitConfig
hereuuid = maybe NoUUID toUUID $ getmaybe (annexConfig "uuid") hereuuid = maybe NoUUID toUUID $ getmaybe (annexConfig "uuid")
clusterprefix = annexConfigPrefix <> "cluster." clusterprefix = annexConfigPrefix <> "cluster."
isclusternamekey k _ = clusterprefix `B.isPrefixOf` (fromConfigKey' k)
&& isLegalName (removeclusterprefix k)
removeclusterprefix k = drop (B.length clusterprefix) (fromConfigKey k)
{- Merge a GitConfig that comes from git-config with one containing {- Merge a GitConfig that comes from git-config with one containing
- repository-global defaults. -} - repository-global defaults. -}
@ -387,7 +388,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexMaxGitBundles :: Int , remoteAnnexMaxGitBundles :: Int
, remoteAnnexAllowEncryptedGitRepo :: Bool , remoteAnnexAllowEncryptedGitRepo :: Bool
, remoteAnnexProxy :: Bool , remoteAnnexProxy :: Bool
, remoteAnnexClusterNode :: Maybe [String] , remoteAnnexClusterNode :: Maybe [RemoteName]
, remoteUrl :: Maybe String , remoteUrl :: Maybe String
{- These settings are specific to particular types of remotes {- These settings are specific to particular types of remotes
@ -473,7 +474,9 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexAllowEncryptedGitRepo = , remoteAnnexAllowEncryptedGitRepo =
getbool AllowEncryptedGitRepoField False getbool AllowEncryptedGitRepoField False
, remoteAnnexProxy = getbool ProxyField False , remoteAnnexProxy = getbool ProxyField False
, remoteAnnexClusterNode = words <$> getmaybe ClusterNodeField , remoteAnnexClusterNode =
(filter isLegalName . words)
<$> getmaybe ClusterNodeField
, remoteUrl = , remoteUrl =
case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of
Just (ConfigValue b) Just (ConfigValue b)

View file

@ -0,0 +1,60 @@
# NAME
git-annex updatecluster - update records with cluster configuration
# SYNOPSIS
git-annex updatecluster
# DESCRIPTION
A git-annex repository can provide access to its remotes as a unified
cluster. This allows other repositories to access the cluster as a remote,
with uploads and downloads distributed amoung the nodes of the cluster,
according to their preferred content settings.
To configure a repository to serve as a proxy to a cluster, first add
remotes to the repository that will serve as nodes of the cluster.
These can be any kind of git-annex remote, including special remotes.
For each remote that will be a node of the cluster,
configure `git config remote.name.annex-cluster-node`, setting it to the
name of the cluster.
Finally, run `git-annex updatecluster` to record the cluster configuration
in the git-annex branch. That tells other repositories about the cluster.
To later add new nodes to the cluster, or remove existing nodes from the
cluster, set or unset `remote.name.annex-cluster-node` as desired,
and run `git-annex updatecluster` again.
Example:
git config remote.foo.annex-cluster-node mycluster
git config remote.bar.annex-cluster-node mycluster
git config remote.baz.annex-cluster-node mycluster
git-annex updatecluster
Suppose, for example, that remote "bigserver" has had those command run in
it. Then after pulling from "bigserver", git-annex will know about an
additional remote, "bigserver-mycluster", which can be used like any other
remote but is an interface to the cluster as a whole. The individual cluster
nodes will also be proxied as remotes, eg "bigserver-foo".
Clusters can only be accessed via ssh.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SEE ALSO
[[git-annex]](1)
[[git-annex-preferred-content]](1)
[[git-annex-updateproxy]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -24,7 +24,9 @@ configuration.
Suppose, for example, that remote "work" has had this command run in Suppose, for example, that remote "work" has had this command run in
it. Then after pulling from "work", git-annex will know about an it. Then after pulling from "work", git-annex will know about an
additional remote, "work-foo". That remote will be accessed using "work" as additional remote, "work-foo". That remote will be accessed using "work" as
a proxy. (This only works for remotes accessed over ssh.) a proxy.
Proxies can only be accessed via ssh.
# OPTIONS # OPTIONS
@ -33,7 +35,7 @@ a proxy. (This only works for remotes accessed over ssh.)
# SEE ALSO # SEE ALSO
[[git-annex]](1) [[git-annex]](1)
[[git-annex-cluster]](1) [[git-annex-updatecluster]](1)
# AUTHOR # AUTHOR

View file

@ -326,6 +326,12 @@ content from the key-value store.
See [[git-annex-required]](1) for details. See [[git-annex-required]](1) for details.
* `updatecluster`
Update records with cluster configuration.
See [[git-annex-updatecluster](1) for details.
* `updateproxy` * `updateproxy`
Update records with proxy configuration. Update records with proxy configuration.
@ -1379,8 +1385,8 @@ repository, using [[git-annex-config]]. See its man page for a list.)
* `annex.cluster.<name>` * `annex.cluster.<name>`
[[git-annex-cluster]] sets this to the UUID of a cluster, to [[git-annex-updatecluster]] sets this to the UUID of a cluster
enable the local repository to act as a proxy to the cluster. based on `remote.<name>.annex-cluster-node` configuration.
Note that cluster UUIDs are not the same as repository UUIDs, Note that cluster UUIDs are not the same as repository UUIDs,
and a repository UUID cannot be used here. and a repository UUID cannot be used here.

View file

@ -720,6 +720,7 @@ Executable git-annex
Command.UnregisterUrl Command.UnregisterUrl
Command.Untrust Command.Untrust
Command.Unused Command.Unused
Command.UpdateCluster
Command.UpdateProxy Command.UpdateProxy
Command.Upgrade Command.Upgrade
Command.VAdd Command.VAdd