From 07e899c9d316555a5615f272bacc8e734fef2f24 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 26 Jun 2024 12:56:16 -0400 Subject: [PATCH] git-annex-shell: proxy nodes located beyond remote cluster gateways Walking a tightrope between security and convenience here, because git-annex-shell needs to only proxy for things when there has been an explicit, local action to configure them. In this case, the user has to have run `git-annex extendcluster`, which now sets annex-cluster-gateway on the remote. Note that any repositories that the gateway is recorded to proxy for will be proxied onward. This is not limited to cluster nodes, because checking the node log would not add any security; someone could add any uuid to it. The gateway of course then does its own checking to determine if it will allow proxying for the remote. --- CmdLine/GitAnnexShell.hs | 22 +++++++++++++++++----- Command/ExtendCluster.hs | 23 ++++++++++++++--------- Command/UpdateCluster.hs | 2 +- Command/UpdateProxy.hs | 2 +- Remote.hs | 2 +- Remote/Git.hs | 24 ++++++++++++++---------- Remote/Helper/Git.hs | 2 +- Types/GitConfig.hs | 18 ++++++++++++++---- doc/git-annex.mdwn | 27 +++++++++++++++++---------- doc/todo/git-annex_proxies.mdwn | 12 ++++++++++-- 10 files changed, 90 insertions(+), 44 deletions(-) diff --git a/CmdLine/GitAnnexShell.hs b/CmdLine/GitAnnexShell.hs index cc4fb406bc..6f7456bb51 100644 --- a/CmdLine/GitAnnexShell.hs +++ b/CmdLine/GitAnnexShell.hs @@ -206,17 +206,29 @@ checkProxy remoteuuid ouruuid = M.lookup ouruuid <$> getProxies >>= \case rs <- concat . byCost <$> remoteList myclusters <- annexClusters <$> Annex.getGitConfig let sameuuid r = uuid r == remoteuuid - -- Only proxy for a remote when the git configuration - -- allows it. - let proxyconfigured r = remoteAnnexProxy (R.gitconfig r) - || (any (`M.member` myclusters) $ fromMaybe [] $ remoteAnnexClusterNode $ R.gitconfig r) let samename r p = name r == proxyRemoteName p - case headMaybe (filter (\r -> sameuuid r && proxyconfigured r && any (samename r) ps) rs) of + case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of Nothing -> notconfigured Just r -> do Annex.changeState $ \st -> st { Annex.proxyremote = Just (Right r) } return True + + -- Only proxy for a remote when the git configuration + -- allows it. This is important to prevent changes to + -- the git-annex branch making git-annex-shell unexpectedly + -- proxy for remotes. + proxyisconfigured rs myclusters r + | remoteAnnexProxy (R.gitconfig r) = True + -- Proxy for remotes that are configured as cluster nodes. + | any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ R.gitconfig r) = True + -- Proxy for a remote when it is proxied by another remote + -- which is itself configured as a cluster gateway. + | otherwise = case remoteAnnexProxiedBy (R.gitconfig r) of + Just proxyuuid -> not $ null $ + concatMap (remoteAnnexClusterGateway . R.gitconfig) $ + filter (\p -> R.uuid p == proxyuuid) rs + Nothing -> False proxyforcluster cu = do clusters <- getClusters diff --git a/Command/ExtendCluster.hs b/Command/ExtendCluster.hs index c83877b05a..6fa248d57a 100644 --- a/Command/ExtendCluster.hs +++ b/Command/ExtendCluster.hs @@ -13,6 +13,7 @@ import Command import qualified Annex import Types.Cluster import Config +import Types.GitConfig import qualified Remote import qualified Data.Map as M @@ -23,11 +24,13 @@ cmd = command "extendcluster" SectionSetup "add an gateway to a cluster" seek :: CmdParams -> CommandSeek seek (remotename:clustername:[]) = Remote.byName (Just clusterremotename) >>= \case - Just clusterremote -> - case mkClusterUUID (Remote.uuid clusterremote) of - Just cu -> commandAction $ start cu clustername - Nothing -> giveup $ clusterremotename - ++ " is not a cluster remote." + Just clusterremote -> Remote.byName (Just remotename) >>= \case + Just gatewayremote -> + case mkClusterUUID (Remote.uuid clusterremote) of + Just cu -> commandAction $ start cu clustername gatewayremote + Nothing -> giveup $ clusterremotename + ++ " is not a cluster remote." + Nothing -> giveup $ "No remote named " ++ remotename ++ " exists." Nothing -> giveup $ "Expected to find a cluster remote named " ++ clusterremotename ++ " that is accessed via " ++ remotename @@ -38,12 +41,14 @@ seek (remotename:clustername:[]) = Remote.byName (Just clusterremotename) >>= \c clusterremotename = remotename ++ "-" ++ clustername seek _ = giveup "Expected two parameters, gateway and clustername." -start :: ClusterUUID -> String -> CommandStart -start cu clustername = starting "extendcluster" ai si $ do +start :: ClusterUUID -> String -> Remote -> CommandStart +start cu clustername gatewayremote = starting "extendcluster" ai si $ do myclusters <- annexClusters <$> Annex.getGitConfig + let setcus f = setConfig f (fromUUID (fromClusterUUID cu)) unless (M.member clustername myclusters) $ do - setConfig (annexConfig ("cluster." <> encodeBS clustername)) - (fromUUID (fromClusterUUID cu)) + setcus $ annexConfig ("cluster." <> encodeBS clustername) + setcus $ remoteAnnexConfig gatewayremote $ + remoteGitConfigKey ClusterGatewayField next $ return True where ai = ActionItemOther (Just (UnquotedString clustername)) diff --git a/Command/UpdateCluster.hs b/Command/UpdateCluster.hs index 72b59233a6..f16318bed0 100644 --- a/Command/UpdateCluster.hs +++ b/Command/UpdateCluster.hs @@ -80,5 +80,5 @@ findProxiedClusterNodes recordednodes = where isproxynode r = asclusternode r `S.member` recordednodes - && remoteAnnexProxied (R.gitconfig r) + && isJust (remoteAnnexProxiedBy (R.gitconfig r)) asclusternode = ClusterNodeUUID . R.uuid diff --git a/Command/UpdateProxy.hs b/Command/UpdateProxy.hs index cbe7ae9a81..d21b7321c4 100644 --- a/Command/UpdateProxy.hs +++ b/Command/UpdateProxy.hs @@ -83,7 +83,7 @@ findRemoteProxiedClusterNodes = do <$> Annex.getGitConfig clusternodes <- clusterNodeUUIDs <$> getClusters let isproxiedclusternode r - | remoteAnnexProxied (R.gitconfig r) = + | isJust (remoteAnnexProxiedBy (R.gitconfig r)) = case M.lookup (ClusterNodeUUID (R.uuid r)) clusternodes of Nothing -> False Just s -> not $ S.null $ diff --git a/Remote.hs b/Remote.hs index 9038d2a767..eea052e254 100644 --- a/Remote.hs +++ b/Remote.hs @@ -455,7 +455,7 @@ gitSyncableRemote :: Remote -> Bool gitSyncableRemote r | gitSyncableRemoteType (remotetype r) && isJust (remoteUrl (gitconfig r)) = - not (remoteAnnexProxied (gitconfig r)) + not (isJust (remoteAnnexProxiedBy (gitconfig r))) | otherwise = case remoteUrl (gitconfig r) of Just u | "annex::" `isPrefixOf` u -> True _ -> False diff --git a/Remote/Git.hs b/Remote/Git.hs index 6c29c28cfe..89e0da38c1 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -794,21 +794,22 @@ listProxied proxies rs = concat <$> mapM go rs then pure [] else case M.lookup cu proxies of Nothing -> pure [] - Just s -> catMaybes - <$> mapM (mkproxied g r s) (S.toList s) + Just proxied -> catMaybes + <$> mapM (mkproxied g r gc proxied) + (S.toList proxied) proxiedremotename r p = do n <- Git.remoteName r pure $ n ++ "-" ++ proxyRemoteName p - mkproxied g r proxied p = case proxiedremotename r p of + mkproxied g r gc proxied p = case proxiedremotename r p of Nothing -> pure Nothing - Just proxyname -> mkproxied' g r proxied p proxyname + Just proxyname -> mkproxied' g r gc proxied p proxyname -- The proxied remote is constructed by renaming the proxy remote, -- changing its uuid, and setting the proxied remote's inherited -- configs and uuid in Annex state. - mkproxied' g r proxied p proxyname + mkproxied' g r gc proxied p proxyname | any isconfig (M.keys (Git.config g)) = pure Nothing | otherwise = do clusters <- getClustersWith id @@ -830,7 +831,7 @@ listProxied proxies rs = concat <$> mapM go rs annexconfigadjuster clusters r' = let c = adduuid (configRepoUUID renamedr) $ addurl $ - addproxied $ + addproxiedby $ adjustclusternode clusters $ inheritconfigs $ Git.fullconfig r' in r' @@ -844,7 +845,10 @@ listProxied proxies rs = concat <$> mapM go rs addurl = M.insert (remoteConfig renamedr (remoteGitConfigKey UrlField)) [Git.ConfigValue $ encodeBS $ Git.repoLocation r] - addproxied = addremoteannexfield ProxiedField True + addproxiedby = case remoteAnnexUUID gc of + Just u -> addremoteannexfield ProxiedByField + [Git.ConfigValue $ fromUUID u] + Nothing -> id -- A node of a cluster that is being proxied along with -- that cluster does not need to be synced with @@ -854,14 +858,14 @@ listProxied proxies rs = concat <$> mapM go rs case M.lookup (ClusterNodeUUID (proxyRemoteUUID p)) (clusterNodeUUIDs clusters) of Just cs | any (\c -> S.member (fromClusterUUID c) proxieduuids) (S.toList cs) -> - addremoteannexfield SyncField False + addremoteannexfield SyncField + [Git.ConfigValue $ Git.Config.boolConfig' False] _ -> id proxieduuids = S.map proxyRemoteUUID proxied - addremoteannexfield f b = M.insert + addremoteannexfield f = M.insert (remoteAnnexConfig renamedr (remoteGitConfigKey f)) - [Git.ConfigValue $ Git.Config.boolConfig' b] inheritconfigs c = foldl' inheritconfig c proxyInheritedFields diff --git a/Remote/Helper/Git.hs b/Remote/Helper/Git.hs index b240ee0a2f..c37d286df4 100644 --- a/Remote/Helper/Git.hs +++ b/Remote/Helper/Git.hs @@ -65,6 +65,6 @@ gitRepoInfo r = do return [ ("repository location", Git.repoLocation repo) , ("proxied", Git.Config.boolConfig - (remoteAnnexProxied (Remote.gitconfig r))) + (isJust (remoteAnnexProxiedBy (Remote.gitconfig r)))) , ("last synced", lastsynctime) ] diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 4b1827306c..c85acb1929 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -386,12 +386,14 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexBwLimitUpload :: Maybe BwRate , remoteAnnexBwLimitDownload :: Maybe BwRate , remoteAnnexAllowUnverifiedDownloads :: Bool + , remoteAnnexUUID :: Maybe UUID , remoteAnnexConfigUUID :: Maybe UUID , remoteAnnexMaxGitBundles :: Int , remoteAnnexAllowEncryptedGitRepo :: Bool , remoteAnnexProxy :: Bool - , remoteAnnexProxied :: Bool + , remoteAnnexProxiedBy :: Maybe UUID , remoteAnnexClusterNode :: Maybe [RemoteName] + , remoteAnnexClusterGateway :: [ClusterUUID] , remoteUrl :: Maybe String {- These settings are specific to particular types of remotes @@ -471,16 +473,20 @@ extractRemoteGitConfig r remotename = do readBwRatePerSecond =<< getmaybe BWLimitDownloadField , remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ getmaybe SecurityAllowUnverifiedDownloadsField + , remoteAnnexUUID = toUUID <$> getmaybe UUIDField , remoteAnnexConfigUUID = toUUID <$> getmaybe ConfigUUIDField , remoteAnnexMaxGitBundles = fromMaybe 100 (getmayberead MaxGitBundlesField) , remoteAnnexAllowEncryptedGitRepo = getbool AllowEncryptedGitRepoField False , remoteAnnexProxy = getbool ProxyField False - , remoteAnnexProxied = getbool ProxiedField False + , remoteAnnexProxiedBy = toUUID <$> getmaybe ProxiedByField , remoteAnnexClusterNode = (filter isLegalName . words) <$> getmaybe ClusterNodeField + , remoteAnnexClusterGateway = fromMaybe [] $ + (mapMaybe (mkClusterUUID . toUUID) . words) + <$> getmaybe ClusterGatewayField , remoteUrl = case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of Just (ConfigValue b) @@ -553,13 +559,15 @@ data RemoteGitConfigField | BWLimitField | BWLimitUploadField | BWLimitDownloadField + | UUIDField | ConfigUUIDField | SecurityAllowUnverifiedDownloadsField | MaxGitBundlesField | AllowEncryptedGitRepoField | ProxyField - | ProxiedField + | ProxiedByField | ClusterNodeField + | ClusterGatewayField | UrlField | ShellField | SshOptionsField @@ -618,14 +626,16 @@ remoteGitConfigField = \case BWLimitField -> inherited "bwlimit" BWLimitUploadField -> inherited "bwlimit-upload" BWLimitDownloadField -> inherited "bwlimit-upload" + UUIDField -> uninherited "uuid" ConfigUUIDField -> uninherited "config-uuid" SecurityAllowUnverifiedDownloadsField -> inherited "security-allow-unverified-downloads" MaxGitBundlesField -> inherited "max-git-bundles" AllowEncryptedGitRepoField -> inherited "allow-encrypted-gitrepo" -- Allow proxy chains. ProxyField -> inherited "proxy" - ProxiedField -> uninherited "proxied" + ProxiedByField -> uninherited "proxied-by" ClusterNodeField -> uninherited "cluster-node" + ClusterGatewayField -> uninherited "cluster-gateway" UrlField -> uninherited "url" ShellField -> inherited "shell" SshOptionsField -> inherited "ssh-options" diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index de8f45e109..3d95cd5b1a 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1398,11 +1398,12 @@ repository, using [[git-annex-config]]. See its man page for a list.) * `annex.cluster.` - [[git-annex-updatecluster]] sets this to the UUID of a cluster - based on `remote..annex-cluster-node` configuration. + This is set to make the repository be a gateway to a cluster. + The value is the cluster UUID. Note that cluster UUIDs are not + the same as repository UUIDs, and a repository UUID cannot be used here. - Note that cluster UUIDs are not the same as repository UUIDs, - and a repository UUID cannot be used here. + Usually this is set up by running [[git-annex-initcluster]] or + [[git-annex-extendcluster]]. # CONFIGURATION OF REMOTES @@ -1681,14 +1682,11 @@ Remotes are configured using these settings in `.git/config`. After configuring this, run [[git-annex-updateproxy](1) to store the new configuration in the git-annex branch. -* `remote..annex-proxied` - - Setting this to "true" indicates that a remote is proxied via the - git-annex repository that its remote points to. That prevents commands - like `git-annex sync` from pulling and pushing the remote. +* `remote..annex-proxied-by` Usually this is used internally, when git-annex sets up proxied remotes, - and will not need to be set. + and will not need to be configured. The value is the UUID of the + git-annex repository that proxies access to this remote. * `remote..annex-cluster-node` @@ -1699,6 +1697,15 @@ Remotes are configured using these settings in `.git/config`. After configuring this, run [[git-annex-updatecluster](1) to store the new configuration in the git-annex branch. +* `remote..annex-cluster-gateway` + + Set to the UUID of a cluster that this remote serves as a gateway for. + Multiple UUIDs can be listed, separated by whitespace. When the local + repository is also a gateway for that cluster, it will proxy for the + nodes of the remote gateway. + + Usually this is set up by running [[git-annex-extendcluster]]. + * `remote..annex-private` When this is set to true, no information about the remote will be diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index daf21838b7..a3584840cf 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -33,8 +33,11 @@ For June's work on [[design/passthrough_proxy]], remaining todos: protocol messages on to any remotes that have the same UUID as the cluster. Needs VIA extension to P2P protocol to avoid cycles. -* `git-annex updatecluster` needs changes to support a distributed cluster. - Currently it will remove nodes that are behind another gateway. + Current status: Distributed cluster nodes are visible, + and can be accessed directly, but trying to GET from a cluster + fails when the content is located behind a remote gateway. + And PUT only sends to the immediate nodes + of the cluster, not on to other gateways. * Getting a key from a cluster currently always selects the lowest cost remote, and always the same remote if cost is the same. Should @@ -106,3 +109,8 @@ For June's work on [[design/passthrough_proxy]], remaining todos: * Support annex.jobs for clusters. (done) +* Add `git-annex extendcluster` command and extend `git-annex updatecluster` + to support clusters with multiple gateways. (done) + +* Support proxying for a remote that is proxied by another gateway of + a cluster. (done)