2024-06-18 14:36:04 +00:00
|
|
|
{- clusters
|
|
|
|
-
|
|
|
|
- Copyright 2024 Joey Hess <id@joeyh.name>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE RankNTypes #-}
|
|
|
|
|
|
|
|
module Annex.Cluster where
|
|
|
|
|
|
|
|
import Annex.Common
|
2024-06-18 15:37:38 +00:00
|
|
|
import qualified Annex
|
2024-06-18 14:36:04 +00:00
|
|
|
import Types.Cluster
|
|
|
|
import Logs.Cluster
|
|
|
|
import P2P.Proxy
|
|
|
|
import P2P.Protocol
|
|
|
|
import P2P.IO
|
2024-06-18 15:01:10 +00:00
|
|
|
import Annex.Proxy
|
2024-06-18 14:36:04 +00:00
|
|
|
import Logs.Location
|
|
|
|
import Types.Command
|
|
|
|
import Remote.List
|
|
|
|
import qualified Remote
|
2024-06-18 15:37:38 +00:00
|
|
|
import qualified Types.Remote as Remote
|
2024-06-18 14:36:04 +00:00
|
|
|
|
|
|
|
import qualified Data.Map as M
|
|
|
|
import qualified Data.Set as S
|
|
|
|
|
|
|
|
{- Proxy to a cluster. -}
|
|
|
|
proxyCluster
|
|
|
|
:: ClusterUUID
|
|
|
|
-> CommandPerform
|
|
|
|
-> ServerMode
|
|
|
|
-> ClientSide
|
|
|
|
-> (forall a. ((a -> CommandPerform) -> Annex (Either ProtoFailure a) -> CommandPerform))
|
|
|
|
-> CommandPerform
|
|
|
|
proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
|
|
|
|
getClientProtocolVersion (fromClusterUUID clusteruuid) clientside
|
|
|
|
withclientversion protoerrhandler
|
|
|
|
where
|
|
|
|
proxymethods = ProxyMethods
|
|
|
|
{ removedContent = \u k -> logChange k u InfoMissing
|
|
|
|
, addedContent = \u k -> logChange k u InfoPresent
|
|
|
|
}
|
|
|
|
|
|
|
|
withclientversion (Just (clientmaxversion, othermsg)) = do
|
|
|
|
-- The protocol versions supported by the nodes are not
|
|
|
|
-- known at this point, and would be too expensive to
|
|
|
|
-- determine. Instead, pick the newest protocol version
|
|
|
|
-- that we and the client both speak.
|
|
|
|
let protocolversion = min maxProtocolVersion clientmaxversion
|
2024-06-18 15:01:10 +00:00
|
|
|
selectnode <- clusterProxySelector clusteruuid protocolversion
|
2024-06-18 16:07:01 +00:00
|
|
|
proxy proxydone proxymethods servermode clientside
|
|
|
|
(fromClusterUUID clusteruuid)
|
|
|
|
selectnode protocolversion othermsg protoerrhandler
|
2024-06-18 14:36:04 +00:00
|
|
|
withclientversion Nothing = proxydone
|
|
|
|
|
2024-06-18 15:01:10 +00:00
|
|
|
clusterProxySelector :: ClusterUUID -> ProtocolVersion -> Annex ProxySelector
|
|
|
|
clusterProxySelector clusteruuid protocolversion = do
|
2024-06-18 14:36:04 +00:00
|
|
|
nodes <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
|
|
|
|
<$> getClusters
|
2024-06-18 15:37:38 +00:00
|
|
|
clusternames <- annexClusters <$> Annex.getGitConfig
|
|
|
|
remotes <- filter (isnode nodes clusternames) <$> remoteList
|
2024-06-18 15:01:10 +00:00
|
|
|
remotesides <- mapM (proxySshRemoteSide protocolversion) remotes
|
2024-06-18 14:36:04 +00:00
|
|
|
return $ ProxySelector
|
checkpresent support for clusters
This assumes that the proxy for a cluster has up-to-date location
logs. If it didn't, it might proxy the checkpresent to a node that no
longer has the content, while some other node still does, and so
it would incorrectly appear that the cluster no longer contains the
content.
Since cluster UUIDs are not stored to location logs,
git-annex fsck --fast when claiming to fix a location log when
that occurred would not cause any problems. And presumably the location
tracking would later get sorted out.
At least usually, changes to the content of nodes goes via the proxy,
and it will update its location logs, so they will be accurate. However,
if there were multiple proxies to the same cluster, or nodes were
accessed directly (or via proxy to the node and not the cluster),
the proxy's location log could certainly be wrong.
(The location log access for GET has the same issues.)
2024-06-18 15:10:48 +00:00
|
|
|
{ proxyCHECKPRESENT = nodecontaining remotesides
|
|
|
|
, proxyGET = nodecontaining remotesides
|
2024-06-18 16:07:01 +00:00
|
|
|
-- Send the key to every node that does not yet contain it.
|
|
|
|
, proxyPUT = \k -> do
|
|
|
|
locs <- S.fromList <$> loggedLocations k
|
|
|
|
return $ filter (flip S.notMember locs . remoteUUID) remotesides
|
2024-06-18 15:01:10 +00:00
|
|
|
, proxyREMOVE = \k -> error "TODO"
|
|
|
|
-- Content is not locked on the cluster as a whole,
|
|
|
|
-- instead it can be locked on individual nodes that are
|
|
|
|
-- proxied to the client.
|
|
|
|
, proxyLOCKCONTENT = const (pure Nothing)
|
|
|
|
, proxyUNLOCKCONTENT = pure Nothing
|
2024-06-18 14:36:04 +00:00
|
|
|
}
|
checkpresent support for clusters
This assumes that the proxy for a cluster has up-to-date location
logs. If it didn't, it might proxy the checkpresent to a node that no
longer has the content, while some other node still does, and so
it would incorrectly appear that the cluster no longer contains the
content.
Since cluster UUIDs are not stored to location logs,
git-annex fsck --fast when claiming to fix a location log when
that occurred would not cause any problems. And presumably the location
tracking would later get sorted out.
At least usually, changes to the content of nodes goes via the proxy,
and it will update its location logs, so they will be accurate. However,
if there were multiple proxies to the same cluster, or nodes were
accessed directly (or via proxy to the node and not the cluster),
the proxy's location log could certainly be wrong.
(The location log access for GET has the same issues.)
2024-06-18 15:10:48 +00:00
|
|
|
where
|
2024-06-18 15:37:38 +00:00
|
|
|
-- Nodes of the cluster have remote.name.annex-cluster-node
|
|
|
|
-- containing its name.
|
|
|
|
isnode nodes clusternames r =
|
|
|
|
case remoteAnnexClusterNode (Remote.gitconfig r) of
|
|
|
|
Nothing -> False
|
|
|
|
Just names
|
|
|
|
| any (isclustername clusternames) names ->
|
|
|
|
flip S.member nodes $
|
|
|
|
ClusterNodeUUID $ Remote.uuid r
|
|
|
|
| otherwise -> False
|
|
|
|
|
|
|
|
isclustername clusternames name =
|
|
|
|
M.lookup name clusternames == Just clusteruuid
|
|
|
|
|
checkpresent support for clusters
This assumes that the proxy for a cluster has up-to-date location
logs. If it didn't, it might proxy the checkpresent to a node that no
longer has the content, while some other node still does, and so
it would incorrectly appear that the cluster no longer contains the
content.
Since cluster UUIDs are not stored to location logs,
git-annex fsck --fast when claiming to fix a location log when
that occurred would not cause any problems. And presumably the location
tracking would later get sorted out.
At least usually, changes to the content of nodes goes via the proxy,
and it will update its location logs, so they will be accurate. However,
if there were multiple proxies to the same cluster, or nodes were
accessed directly (or via proxy to the node and not the cluster),
the proxy's location log could certainly be wrong.
(The location log access for GET has the same issues.)
2024-06-18 15:10:48 +00:00
|
|
|
nodecontaining remotesides k = do
|
|
|
|
locs <- S.fromList <$> loggedLocations k
|
|
|
|
case filter (flip S.member locs . remoteUUID) remotesides of
|
|
|
|
-- TODO: Avoid always using same remote
|
|
|
|
(r:_) -> return (Just r)
|
|
|
|
[] -> return Nothing
|
|
|
|
|