initial, working support for getting from clusters

Currently tends to put all the load on a single node, which will need to
be improved.
This commit is contained in:
Joey Hess 2024-06-18 11:01:10 -04:00
parent d34326ab76
commit 88d9a02f7c
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 46 additions and 22 deletions

View file

@ -15,6 +15,7 @@ import Logs.Cluster
import P2P.Proxy import P2P.Proxy
import P2P.Protocol import P2P.Protocol
import P2P.IO import P2P.IO
import Annex.Proxy
import Logs.Location import Logs.Location
import Types.Command import Types.Command
import Remote.List import Remote.List
@ -46,22 +47,31 @@ proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do
-- determine. Instead, pick the newest protocol version -- determine. Instead, pick the newest protocol version
-- that we and the client both speak. -- that we and the client both speak.
let protocolversion = min maxProtocolVersion clientmaxversion let protocolversion = min maxProtocolVersion clientmaxversion
selectnode <- clusterProxySelector clusteruuid selectnode <- clusterProxySelector clusteruuid protocolversion
proxy proxydone proxymethods servermode clientside selectnode proxy proxydone proxymethods servermode clientside selectnode
protocolversion othermsg protoerrhandler protocolversion othermsg protoerrhandler
withclientversion Nothing = proxydone withclientversion Nothing = proxydone
clusterProxySelector :: ClusterUUID -> Annex ProxySelector clusterProxySelector :: ClusterUUID -> ProtocolVersion -> Annex ProxySelector
clusterProxySelector clusteruuid = do clusterProxySelector clusteruuid protocolversion = do
nodes <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs) nodes <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
<$> getClusters <$> getClusters
remotes <- filter (flip S.member nodes . ClusterNodeUUID . Remote.uuid) remotes <- filter (flip S.member nodes . ClusterNodeUUID . Remote.uuid)
<$> remoteList <$> remoteList
remotesides <- mapM (proxySshRemoteSide protocolversion) remotes
return $ ProxySelector return $ ProxySelector
{ proxyCHECKPRESENT = \k -> error "TODO" { proxyCHECKPRESENT = \k -> error "TODO"
, proxyLOCKCONTENT = \k -> error "TODO" , proxyGET = \k -> do
, proxyUNLOCKCONTENT = error "TODO" locs <- S.fromList <$> loggedLocations k
, proxyREMOVE = \k -> error "TODO" case filter (flip S.member locs . remoteUUID) remotesides of
, proxyGET = \k -> error "TODO" -- TODO: Avoid always using same remote
(r:_) -> return (Just r)
[] -> return Nothing
, proxyPUT = \k -> error "TODO" , proxyPUT = \k -> error "TODO"
, proxyREMOVE = \k -> error "TODO"
-- Content is not locked on the cluster as a whole,
-- instead it can be locked on individual nodes that are
-- proxied to the client.
, proxyLOCKCONTENT = const (pure Nothing)
, proxyUNLOCKCONTENT = pure Nothing
} }

View file

@ -55,20 +55,20 @@ closeRemoteSide remoteside =
- -} - -}
data ProxySelector = ProxySelector data ProxySelector = ProxySelector
{ proxyCHECKPRESENT :: Key -> Annex RemoteSide { proxyCHECKPRESENT :: Key -> Annex RemoteSide
, proxyLOCKCONTENT :: Key -> Annex RemoteSide , proxyLOCKCONTENT :: Key -> Annex (Maybe RemoteSide)
, proxyUNLOCKCONTENT :: Annex RemoteSide , proxyUNLOCKCONTENT :: Annex (Maybe RemoteSide)
, proxyREMOVE :: Key -> Annex RemoteSide , proxyREMOVE :: Key -> Annex RemoteSide
, proxyGET :: Key -> Annex RemoteSide , proxyGET :: Key -> Annex (Maybe RemoteSide)
, proxyPUT :: Key -> Annex RemoteSide , proxyPUT :: Key -> Annex RemoteSide
} }
singleProxySelector :: RemoteSide -> ProxySelector singleProxySelector :: RemoteSide -> ProxySelector
singleProxySelector r = ProxySelector singleProxySelector r = ProxySelector
{ proxyCHECKPRESENT = const (pure r) { proxyCHECKPRESENT = const (pure r)
, proxyLOCKCONTENT = const (pure r) , proxyLOCKCONTENT = const (pure (Just r))
, proxyUNLOCKCONTENT = pure r , proxyUNLOCKCONTENT = pure (Just r)
, proxyREMOVE = const (pure r) , proxyREMOVE = const (pure r)
, proxyGET = const (pure r) , proxyGET = const (pure (Just r))
, proxyPUT = const (pure r) , proxyPUT = const (pure r)
} }
@ -163,19 +163,28 @@ proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) prox
CHECKPRESENT k -> do CHECKPRESENT k -> do
remoteside <- proxyCHECKPRESENT proxyselector k remoteside <- proxyCHECKPRESENT proxyselector k
proxyresponse remoteside message (const proxynextclientmessage) proxyresponse remoteside message (const proxynextclientmessage)
LOCKCONTENT k -> do LOCKCONTENT k -> proxyLOCKCONTENT proxyselector k >>= \case
remoteside <- proxyLOCKCONTENT proxyselector k Just remoteside ->
proxyresponse remoteside message (const proxynextclientmessage) proxyresponse remoteside message
UNLOCKCONTENT -> do (const proxynextclientmessage)
remoteside <- proxyUNLOCKCONTENT proxyselector Nothing ->
proxynoresponse remoteside message proxynextclientmessage protoerrhandler proxynextclientmessage $
client $ net $ sendMessage FAILURE
UNLOCKCONTENT -> proxyUNLOCKCONTENT proxyselector >>= \case
Just remoteside ->
proxynoresponse remoteside message
proxynextclientmessage
Nothing -> proxynextclientmessage ()
REMOVE k -> do REMOVE k -> do
remoteside <- proxyREMOVE proxyselector k remoteside <- proxyREMOVE proxyselector k
servermodechecker checkREMOVEServerMode $ servermodechecker checkREMOVEServerMode $
handleREMOVE remoteside k message handleREMOVE remoteside k message
GET _ _ k -> do GET _ _ k -> proxyGET proxyselector k >>= \case
remoteside <- proxyGET proxyselector k Just remoteside -> handleGET remoteside message
handleGET remoteside message Nothing ->
protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $
ERROR "content not present"
PUT _ k -> do PUT _ k -> do
remoteside <- proxyPUT proxyselector k remoteside <- proxyPUT proxyselector k
servermodechecker checkPUTServerMode $ servermodechecker checkPUTServerMode $

View file

@ -57,6 +57,11 @@ For June's work on [[design/passthrough_proxy]], implementation plan:
* Getting a key from a cluster should proxy from one of the nodes that has * Getting a key from a cluster should proxy from one of the nodes that has
it, or from the proxy repository itself if it has the key. it, or from the proxy repository itself if it has the key.
* Getting a key from a cluster currently always selects the lowest cost
remote, and always the same remote if cost is the same. Should
round-robin amoung remotes, and prefer to avoid using remotes that
other git-annex processes are currently using.
* Implement upload with fanout and reporting back additional UUIDs over P2P * Implement upload with fanout and reporting back additional UUIDs over P2P
protocol. protocol.