GET and CHECKPRESENT amoung lowest cost cluster nodes

Before it was using a node that might have had a higher cost.

Also threw in a random selection from amoung the low cost nodes. Of
course this is a poor excuse for load balancing, but it's better than
nothing. Most of the time...
This commit is contained in:
Joey Hess 2024-06-27 14:36:55 -04:00
parent dceb8dc776
commit cf59d7f92c
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 56 additions and 39 deletions

View file

@ -27,6 +27,7 @@ import qualified Types.Remote as Remote
import qualified Data.Map as M
import qualified Data.Set as S
import System.Random
{- Proxy to a cluster. -}
proxyCluster
@ -75,7 +76,7 @@ clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs)
<$> getClusters
myclusters <- annexClusters <$> Annex.getGitConfig
allremotes <- remoteList
allremotes <- concat . Remote.byCost <$> remoteList
hereu <- getUUID
let bypass' = S.insert hereu bypass
let clusterremotes = filter (isnode bypass' allremotes nodeuuids myclusters) allremotes
@ -94,8 +95,8 @@ clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
-- skipping nodes where it's not preferred content.
, proxyPUT = \af k -> do
locs <- S.fromList <$> loggedLocations k
let l = filter (flip S.notMember locs . remoteUUID) nodes
l' <- filterM (\n -> isPreferredContent (Just (remoteUUID n)) mempty (Just k) af True) l
let l = filter (flip S.notMember locs . Remote.uuid . remote) nodes
l' <- filterM (\n -> isPreferredContent (Just (Remote.uuid (remote n))) mempty (Just k) af True) l
-- PUT to no nodes doesn't work, so fall
-- back to all nodes.
return $ nonempty [l', l] nodes
@ -146,11 +147,19 @@ clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do
nodecontaining nodes k = do
locs <- S.fromList <$> loggedLocations k
case filter (flip S.member locs . remoteUUID) nodes of
-- For now, pick the first node that has the
-- content. Load balancing would be nice..
(r:_) -> return (Just r)
case filter (flip S.member locs . Remote.uuid . remote) nodes of
[] -> return Nothing
(node:[]) -> return (Just node)
(node:rest) ->
-- The list of nodes is ordered by cost.
-- Use any of the ones with equally low
-- cost.
let lowestcost = Remote.cost (remote node)
samecost = node : takeWhile (\n -> Remote.cost (remote n) == lowestcost) rest
in do
n <- getStdRandom $
randomR (0, length samecost - 1)
return (Just (samecost !! n))
nonempty (l:ls) fallback
| null l = nonempty ls fallback

View file

@ -11,18 +11,16 @@ import Annex.Common
import P2P.Proxy
import P2P.Protocol
import P2P.IO
import qualified Remote
import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
-- FIXME: Support special remotes.
proxySshRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
proxySshRemoteSide clientmaxversion bypass remote =
mkRemoteSide (Remote.uuid remote) $
openP2PShellConnection' remote clientmaxversion bypass >>= \case
Just conn@(OpenConnection (remoterunst, remoteconn, _)) ->
return $ Just
( remoterunst
, remoteconn
, void $ liftIO $ closeP2PShellConnection conn
)
_ -> return Nothing
proxySshRemoteSide clientmaxversion bypass r = mkRemoteSide r $
openP2PShellConnection' r clientmaxversion bypass >>= \case
Just conn@(OpenConnection (remoterunst, remoteconn, _)) ->
return $ Just
( remoterunst
, remoteconn
, void $ liftIO $ closeP2PShellConnection conn
)
_ -> return Nothing

View file

@ -60,14 +60,14 @@ performLocal theiruuid servermode = do
p2pErrHandler (const p2pDone) (runFullProto runst conn server)
performProxy :: UUID -> P2P.ServerMode -> Remote -> CommandPerform
performProxy clientuuid servermode remote = do
performProxy clientuuid servermode r = do
clientside <- proxyClientSide clientuuid
getClientProtocolVersion (Remote.uuid remote) clientside
getClientProtocolVersion (Remote.uuid r) clientside
(withclientversion clientside)
p2pErrHandler
where
withclientversion clientside (Just (clientmaxversion, othermsg)) = do
remoteside <- proxySshRemoteSide clientmaxversion mempty remote
remoteside <- proxySshRemoteSide clientmaxversion mempty r
protocolversion <- either (const (min P2P.maxProtocolVersion clientmaxversion)) id
<$> runRemoteSide remoteside
(P2P.net P2P.getProtocolVersion)
@ -77,7 +77,7 @@ performProxy clientuuid servermode remote = do
concurrencyconfig <- noConcurrencyConfig
let runproxy othermsg' = proxy closer proxymethods
servermode clientside
(Remote.uuid remote)
(Remote.uuid r)
(singleProxySelector remoteside)
concurrencyconfig
protocolversion othermsg' p2pErrHandler

View file

@ -18,6 +18,7 @@ import Utility.Metered
import Git.FilePath
import Types.Concurrency
import Annex.Concurrent
import qualified Remote
import Data.Either
import Control.Concurrent.STM
@ -32,14 +33,14 @@ type ProtoCloser = Annex ()
data ClientSide = ClientSide RunState P2PConnection
data RemoteSide = RemoteSide
{ remoteUUID :: UUID
{ remote :: Remote
, remoteConnect :: Annex (Maybe (RunState, P2PConnection, ProtoCloser))
, remoteTMVar :: TMVar (RunState, P2PConnection, ProtoCloser)
}
mkRemoteSide :: UUID -> Annex (Maybe (RunState, P2PConnection, ProtoCloser)) -> Annex RemoteSide
mkRemoteSide remoteuuid remoteconnect = RemoteSide
<$> pure remoteuuid
mkRemoteSide :: Remote -> Annex (Maybe (RunState, P2PConnection, ProtoCloser)) -> Annex RemoteSide
mkRemoteSide r remoteconnect = RemoteSide
<$> pure r
<*> pure remoteconnect
<*> liftIO (atomically newEmptyTMVar)
@ -328,9 +329,9 @@ proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) remo
net $ sendMessage message
net receiveMessage >>= return . \case
Just SUCCESS ->
Just (True, [remoteUUID r])
Just (True, [Remote.uuid (remote r)])
Just (SUCCESS_PLUS us) ->
Just (True, remoteUUID r:us)
Just (True, Remote.uuid (remote r):us)
Just FAILURE ->
Just (False, [])
Just (FAILURE_PLUS us) ->
@ -355,7 +356,7 @@ proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) remo
withDATA (relayGET remoteside)
handlePUT (remoteside:[]) k message
| remoteUUID remoteside == remoteuuid =
| Remote.uuid (remote remoteside) == remoteuuid =
getresponse (runRemoteSide remoteside) message $ \resp -> case resp of
ALREADY_HAVE -> protoerrhandler proxynextclientmessage $
client $ net $ sendMessage resp
@ -390,10 +391,10 @@ proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) remo
proxynextclientmessage ()
relayPUTRecord k remoteside SUCCESS = do
addedContent proxymethods (remoteUUID remoteside) k
return $ Just [remoteUUID remoteside]
addedContent proxymethods (Remote.uuid (remote remoteside)) k
return $ Just [Remote.uuid (remote remoteside)]
relayPUTRecord k remoteside (SUCCESS_PLUS us) = do
let us' = remoteUUID remoteside : us
let us' = (Remote.uuid (remote remoteside)) : us
forM_ us' $ \u ->
addedContent proxymethods u k
return $ Just us'
@ -425,7 +426,7 @@ proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) remo
else protoerrhandler proxynextclientmessage $
client $ net $ sendMessage $ ALREADY_HAVE_PLUS $
filter (/= remoteuuid) $
map remoteUUID (lefts (rights l))
map (Remote.uuid . remote) (lefts (rights l))
else if null (rights l)
-- no response from any remote
then proxydone
@ -439,11 +440,11 @@ proxy proxydone proxymethods servermode (ClientSide clientrunst clientconn) remo
let totallen = datalen + minoffset
-- Tell each remote how much data to expect, depending
-- on the remote's offset.
rs <- forMC concurrencyconfig remotes $ \remote@(remoteside, remoteoffset) ->
rs <- forMC concurrencyconfig remotes $ \r@(remoteside, remoteoffset) ->
runRemoteSideOrSkipFailed remoteside $ do
net $ sendMessage $ DATA $ Len $
totallen - remoteoffset
return remote
return r
protoerrhandler (send (catMaybes rs) minoffset) $
client $ net $ receiveBytes (Len datalen) nullMeterUpdate
where

View file

@ -36,10 +36,19 @@ For June's work on [[design/passthrough_proxy]], remaining todos:
* Indirect uploads when proxying for special remote
(to be considered). See design.
* Getting a key from a cluster currently always selects the lowest cost
remote, and always the same remote if cost is the same. Should
round-robin amoung remotes, and prefer to avoid using remotes that
other git-annex processes are currently using.
* Getting a key from a cluster currently picks from amoung
the lowest cost remotes at random. This could be smarter,
eg prefer to avoid using remotes that are doing other transfers at the
same time.
* The cost of a cluster and of its proxied nodes is currently all the same.
It would make sense for proxied nodes that are accessed via an intermedia
gateway to have a higher cost than proxied nodes that are accessed via
the remote gateway. And proxied nodes should generally have a higher cost
than the cluster, so that git-annex defaults to using the cluster.
(The cost of accessing a proxied node vs using the cluster is the same,
but using the cluster allows smarter load-balancing to be done on the
cluster. It also makes the UI not mention individual nodes.)
* Optimise proxy speed. See design for ideas.