git-annex-shell: proxy nodes located beyond remote cluster gateways

Walking a tightrope between security and convenience here, because
git-annex-shell needs to only proxy for things when there has been
an explicit, local action to configure them.

In this case, the user has to have run `git-annex extendcluster`,
which now sets annex-cluster-gateway on the remote.

Note that any repositories that the gateway is recorded to
proxy for will be proxied onward. This is not limited to cluster nodes,
because checking the node log would not add any security; someone could
add any uuid to it. The gateway of course then does its own
checking to determine if it will allow proxying for the remote.
This commit is contained in:
Joey Hess 2024-06-26 12:56:16 -04:00
parent 1ec2fecf3f
commit 07e899c9d3
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
10 changed files with 90 additions and 44 deletions

View file

@ -206,18 +206,30 @@ checkProxy remoteuuid ouruuid = M.lookup ouruuid <$> getProxies >>= \case
rs <- concat . byCost <$> remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = uuid r == remoteuuid
-- Only proxy for a remote when the git configuration
-- allows it.
let proxyconfigured r = remoteAnnexProxy (R.gitconfig r)
|| (any (`M.member` myclusters) $ fromMaybe [] $ remoteAnnexClusterNode $ R.gitconfig r)
let samename r p = name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyconfigured r && any (samename r) ps) rs) of
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
Nothing -> notconfigured
Just r -> do
Annex.changeState $ \st ->
st { Annex.proxyremote = Just (Right r) }
return True
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch making git-annex-shell unexpectedly
-- proxy for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (R.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ R.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (R.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . R.gitconfig) $
filter (\p -> R.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do
clusters <- getClusters
if M.member cu (clusterUUIDs clusters)

View file

@ -13,6 +13,7 @@ import Command
import qualified Annex
import Types.Cluster
import Config
import Types.GitConfig
import qualified Remote
import qualified Data.Map as M
@ -23,11 +24,13 @@ cmd = command "extendcluster" SectionSetup "add an gateway to a cluster"
seek :: CmdParams -> CommandSeek
seek (remotename:clustername:[]) = Remote.byName (Just clusterremotename) >>= \case
Just clusterremote ->
Just clusterremote -> Remote.byName (Just remotename) >>= \case
Just gatewayremote ->
case mkClusterUUID (Remote.uuid clusterremote) of
Just cu -> commandAction $ start cu clustername
Just cu -> commandAction $ start cu clustername gatewayremote
Nothing -> giveup $ clusterremotename
++ " is not a cluster remote."
Nothing -> giveup $ "No remote named " ++ remotename ++ " exists."
Nothing -> giveup $ "Expected to find a cluster remote named "
++ clusterremotename
++ " that is accessed via " ++ remotename
@ -38,12 +41,14 @@ seek (remotename:clustername:[]) = Remote.byName (Just clusterremotename) >>= \c
clusterremotename = remotename ++ "-" ++ clustername
seek _ = giveup "Expected two parameters, gateway and clustername."
start :: ClusterUUID -> String -> CommandStart
start cu clustername = starting "extendcluster" ai si $ do
start :: ClusterUUID -> String -> Remote -> CommandStart
start cu clustername gatewayremote = starting "extendcluster" ai si $ do
myclusters <- annexClusters <$> Annex.getGitConfig
let setcus f = setConfig f (fromUUID (fromClusterUUID cu))
unless (M.member clustername myclusters) $ do
setConfig (annexConfig ("cluster." <> encodeBS clustername))
(fromUUID (fromClusterUUID cu))
setcus $ annexConfig ("cluster." <> encodeBS clustername)
setcus $ remoteAnnexConfig gatewayremote $
remoteGitConfigKey ClusterGatewayField
next $ return True
where
ai = ActionItemOther (Just (UnquotedString clustername))

View file

@ -80,5 +80,5 @@ findProxiedClusterNodes recordednodes =
where
isproxynode r =
asclusternode r `S.member` recordednodes
&& remoteAnnexProxied (R.gitconfig r)
&& isJust (remoteAnnexProxiedBy (R.gitconfig r))
asclusternode = ClusterNodeUUID . R.uuid

View file

@ -83,7 +83,7 @@ findRemoteProxiedClusterNodes = do
<$> Annex.getGitConfig
clusternodes <- clusterNodeUUIDs <$> getClusters
let isproxiedclusternode r
| remoteAnnexProxied (R.gitconfig r) =
| isJust (remoteAnnexProxiedBy (R.gitconfig r)) =
case M.lookup (ClusterNodeUUID (R.uuid r)) clusternodes of
Nothing -> False
Just s -> not $ S.null $

View file

@ -455,7 +455,7 @@ gitSyncableRemote :: Remote -> Bool
gitSyncableRemote r
| gitSyncableRemoteType (remotetype r)
&& isJust (remoteUrl (gitconfig r)) =
not (remoteAnnexProxied (gitconfig r))
not (isJust (remoteAnnexProxiedBy (gitconfig r)))
| otherwise = case remoteUrl (gitconfig r) of
Just u | "annex::" `isPrefixOf` u -> True
_ -> False

View file

@ -794,21 +794,22 @@ listProxied proxies rs = concat <$> mapM go rs
then pure []
else case M.lookup cu proxies of
Nothing -> pure []
Just s -> catMaybes
<$> mapM (mkproxied g r s) (S.toList s)
Just proxied -> catMaybes
<$> mapM (mkproxied g r gc proxied)
(S.toList proxied)
proxiedremotename r p = do
n <- Git.remoteName r
pure $ n ++ "-" ++ proxyRemoteName p
mkproxied g r proxied p = case proxiedremotename r p of
mkproxied g r gc proxied p = case proxiedremotename r p of
Nothing -> pure Nothing
Just proxyname -> mkproxied' g r proxied p proxyname
Just proxyname -> mkproxied' g r gc proxied p proxyname
-- The proxied remote is constructed by renaming the proxy remote,
-- changing its uuid, and setting the proxied remote's inherited
-- configs and uuid in Annex state.
mkproxied' g r proxied p proxyname
mkproxied' g r gc proxied p proxyname
| any isconfig (M.keys (Git.config g)) = pure Nothing
| otherwise = do
clusters <- getClustersWith id
@ -830,7 +831,7 @@ listProxied proxies rs = concat <$> mapM go rs
annexconfigadjuster clusters r' =
let c = adduuid (configRepoUUID renamedr) $
addurl $
addproxied $
addproxiedby $
adjustclusternode clusters $
inheritconfigs $ Git.fullconfig r'
in r'
@ -844,7 +845,10 @@ listProxied proxies rs = concat <$> mapM go rs
addurl = M.insert (remoteConfig renamedr (remoteGitConfigKey UrlField))
[Git.ConfigValue $ encodeBS $ Git.repoLocation r]
addproxied = addremoteannexfield ProxiedField True
addproxiedby = case remoteAnnexUUID gc of
Just u -> addremoteannexfield ProxiedByField
[Git.ConfigValue $ fromUUID u]
Nothing -> id
-- A node of a cluster that is being proxied along with
-- that cluster does not need to be synced with
@ -854,14 +858,14 @@ listProxied proxies rs = concat <$> mapM go rs
case M.lookup (ClusterNodeUUID (proxyRemoteUUID p)) (clusterNodeUUIDs clusters) of
Just cs
| any (\c -> S.member (fromClusterUUID c) proxieduuids) (S.toList cs) ->
addremoteannexfield SyncField False
addremoteannexfield SyncField
[Git.ConfigValue $ Git.Config.boolConfig' False]
_ -> id
proxieduuids = S.map proxyRemoteUUID proxied
addremoteannexfield f b = M.insert
addremoteannexfield f = M.insert
(remoteAnnexConfig renamedr (remoteGitConfigKey f))
[Git.ConfigValue $ Git.Config.boolConfig' b]
inheritconfigs c = foldl' inheritconfig c proxyInheritedFields

View file

@ -65,6 +65,6 @@ gitRepoInfo r = do
return
[ ("repository location", Git.repoLocation repo)
, ("proxied", Git.Config.boolConfig
(remoteAnnexProxied (Remote.gitconfig r)))
(isJust (remoteAnnexProxiedBy (Remote.gitconfig r))))
, ("last synced", lastsynctime)
]

View file

@ -386,12 +386,14 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexBwLimitUpload :: Maybe BwRate
, remoteAnnexBwLimitDownload :: Maybe BwRate
, remoteAnnexAllowUnverifiedDownloads :: Bool
, remoteAnnexUUID :: Maybe UUID
, remoteAnnexConfigUUID :: Maybe UUID
, remoteAnnexMaxGitBundles :: Int
, remoteAnnexAllowEncryptedGitRepo :: Bool
, remoteAnnexProxy :: Bool
, remoteAnnexProxied :: Bool
, remoteAnnexProxiedBy :: Maybe UUID
, remoteAnnexClusterNode :: Maybe [RemoteName]
, remoteAnnexClusterGateway :: [ClusterUUID]
, remoteUrl :: Maybe String
{- These settings are specific to particular types of remotes
@ -471,16 +473,20 @@ extractRemoteGitConfig r remotename = do
readBwRatePerSecond =<< getmaybe BWLimitDownloadField
, remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
getmaybe SecurityAllowUnverifiedDownloadsField
, remoteAnnexUUID = toUUID <$> getmaybe UUIDField
, remoteAnnexConfigUUID = toUUID <$> getmaybe ConfigUUIDField
, remoteAnnexMaxGitBundles =
fromMaybe 100 (getmayberead MaxGitBundlesField)
, remoteAnnexAllowEncryptedGitRepo =
getbool AllowEncryptedGitRepoField False
, remoteAnnexProxy = getbool ProxyField False
, remoteAnnexProxied = getbool ProxiedField False
, remoteAnnexProxiedBy = toUUID <$> getmaybe ProxiedByField
, remoteAnnexClusterNode =
(filter isLegalName . words)
<$> getmaybe ClusterNodeField
, remoteAnnexClusterGateway = fromMaybe [] $
(mapMaybe (mkClusterUUID . toUUID) . words)
<$> getmaybe ClusterGatewayField
, remoteUrl =
case Git.Config.getMaybe (remoteConfig remotename (remoteGitConfigKey UrlField)) r of
Just (ConfigValue b)
@ -553,13 +559,15 @@ data RemoteGitConfigField
| BWLimitField
| BWLimitUploadField
| BWLimitDownloadField
| UUIDField
| ConfigUUIDField
| SecurityAllowUnverifiedDownloadsField
| MaxGitBundlesField
| AllowEncryptedGitRepoField
| ProxyField
| ProxiedField
| ProxiedByField
| ClusterNodeField
| ClusterGatewayField
| UrlField
| ShellField
| SshOptionsField
@ -618,14 +626,16 @@ remoteGitConfigField = \case
BWLimitField -> inherited "bwlimit"
BWLimitUploadField -> inherited "bwlimit-upload"
BWLimitDownloadField -> inherited "bwlimit-upload"
UUIDField -> uninherited "uuid"
ConfigUUIDField -> uninherited "config-uuid"
SecurityAllowUnverifiedDownloadsField -> inherited "security-allow-unverified-downloads"
MaxGitBundlesField -> inherited "max-git-bundles"
AllowEncryptedGitRepoField -> inherited "allow-encrypted-gitrepo"
-- Allow proxy chains.
ProxyField -> inherited "proxy"
ProxiedField -> uninherited "proxied"
ProxiedByField -> uninherited "proxied-by"
ClusterNodeField -> uninherited "cluster-node"
ClusterGatewayField -> uninherited "cluster-gateway"
UrlField -> uninherited "url"
ShellField -> inherited "shell"
SshOptionsField -> inherited "ssh-options"

View file

@ -1398,11 +1398,12 @@ repository, using [[git-annex-config]]. See its man page for a list.)
* `annex.cluster.<name>`
[[git-annex-updatecluster]] sets this to the UUID of a cluster
based on `remote.<name>.annex-cluster-node` configuration.
This is set to make the repository be a gateway to a cluster.
The value is the cluster UUID. Note that cluster UUIDs are not
the same as repository UUIDs, and a repository UUID cannot be used here.
Note that cluster UUIDs are not the same as repository UUIDs,
and a repository UUID cannot be used here.
Usually this is set up by running [[git-annex-initcluster]] or
[[git-annex-extendcluster]].
# CONFIGURATION OF REMOTES
@ -1681,14 +1682,11 @@ Remotes are configured using these settings in `.git/config`.
After configuring this, run [[git-annex-updateproxy](1) to store
the new configuration in the git-annex branch.
* `remote.<name>.annex-proxied`
Setting this to "true" indicates that a remote is proxied via the
git-annex repository that its remote points to. That prevents commands
like `git-annex sync` from pulling and pushing the remote.
* `remote.<name>.annex-proxied-by`
Usually this is used internally, when git-annex sets up proxied remotes,
and will not need to be set.
and will not need to be configured. The value is the UUID of the
git-annex repository that proxies access to this remote.
* `remote.<name>.annex-cluster-node`
@ -1699,6 +1697,15 @@ Remotes are configured using these settings in `.git/config`.
After configuring this, run [[git-annex-updatecluster](1) to store
the new configuration in the git-annex branch.
* `remote.<name>.annex-cluster-gateway`
Set to the UUID of a cluster that this remote serves as a gateway for.
Multiple UUIDs can be listed, separated by whitespace. When the local
repository is also a gateway for that cluster, it will proxy for the
nodes of the remote gateway.
Usually this is set up by running [[git-annex-extendcluster]].
* `remote.<name>.annex-private`
When this is set to true, no information about the remote will be

View file

@ -33,8 +33,11 @@ For June's work on [[design/passthrough_proxy]], remaining todos:
protocol messages on to any remotes that have the same UUID as
the cluster. Needs VIA extension to P2P protocol to avoid cycles.
* `git-annex updatecluster` needs changes to support a distributed cluster.
Currently it will remove nodes that are behind another gateway.
Current status: Distributed cluster nodes are visible,
and can be accessed directly, but trying to GET from a cluster
fails when the content is located behind a remote gateway.
And PUT only sends to the immediate nodes
of the cluster, not on to other gateways.
* Getting a key from a cluster currently always selects the lowest cost
remote, and always the same remote if cost is the same. Should
@ -106,3 +109,8 @@ For June's work on [[design/passthrough_proxy]], remaining todos:
* Support annex.jobs for clusters. (done)
* Add `git-annex extendcluster` command and extend `git-annex updatecluster`
to support clusters with multiple gateways. (done)
* Support proxying for a remote that is proxied by another gateway of
a cluster. (done)