diff --git a/Annex/Cluster.hs b/Annex/Cluster.hs index dc58df5306..2faa7c5056 100644 --- a/Annex/Cluster.hs +++ b/Annex/Cluster.hs @@ -58,10 +58,11 @@ proxyCluster clusteruuid proxydone servermode clientside protoerrhandler = do (withclientbypass protocolversion) (protoerrhandler noop) withclientbypass protocolversion (bypassuuids, othermsg) = do - (selectnode, closenodes, concurrencyconfig) <- + (selectnode, closenodes) <- clusterProxySelector clusteruuid protocolversion bypassuuids proxystate <- liftIO mkProxyState + concurrencyconfig <- concurrencyConfigJobs let proxyparams = ProxyParams { proxyMethods = mkProxyMethods , proxyState = proxystate @@ -79,7 +80,7 @@ clusterProxySelector :: ClusterUUID -> ProtocolVersion -> Bypass - -> Annex (ProxySelector, Annex (), ConcurrencyConfig) + -> Annex (ProxySelector, Annex ()) clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do nodeuuids <- (fromMaybe S.empty . M.lookup clusteruuid . clusterUUIDs) <$> getClusters @@ -120,8 +121,7 @@ clusterProxySelector clusteruuid protocolversion (Bypass bypass) = do -- proxied to the client. , proxyLOCKCONTENT = const (pure Nothing) } - concurrencyconfig <- getConcurrencyConfig - return (proxyselector, closenodes, concurrencyconfig) + return (proxyselector, closenodes) where -- Nodes of the cluster have remote.name.annex-cluster-node -- containing its name. diff --git a/Command/P2PHttp.hs b/Command/P2PHttp.hs index 7b24bec941..caa693e139 100644 --- a/Command/P2PHttp.hs +++ b/Command/P2PHttp.hs @@ -41,6 +41,7 @@ data Options = Options , unauthAppendOnlyOption :: Bool , wideOpenOption :: Bool , proxyConnectionsOption :: Maybe Integer + , clusterJobsOption :: Maybe Int } optParser :: CmdParamsDesc -> Parser Options @@ -89,10 +90,16 @@ optParser _ = Options ( long "proxyconnections" <> metavar paramNumber <> help "maximum number of idle connections when proxying" )) + <*> optional (option auto + ( long "clusterjobs" <> metavar paramNumber + <> help "number of concurrent node accesses per connection" + )) seek :: Options -> CommandSeek seek o = getAnnexWorkerPool $ \workerpool -> - withP2PConnections workerpool (fromMaybe 1 $ proxyConnectionsOption o) + withP2PConnections workerpool + (fromMaybe 1 $ proxyConnectionsOption o) + (fromMaybe 1 $ clusterJobsOption o) (go workerpool) where go workerpool acquireconn = liftIO $ do diff --git a/P2P/Http/State.hs b/P2P/Http/State.hs index baf7567df3..899584a52b 100644 --- a/P2P/Http/State.hs +++ b/P2P/Http/State.hs @@ -181,9 +181,10 @@ type AcquireP2PConnection withP2PConnections :: AnnexWorkerPool -> ProxyConnectionPoolSize + -> ClusterConcurrency -> (AcquireP2PConnection -> Annex a) -> Annex a -withP2PConnections workerpool proxyconnectionpoolsize a = do +withP2PConnections workerpool proxyconnectionpoolsize clusterconcurrency a = do myuuid <- getUUID reqv <- liftIO newEmptyTMVarIO relv <- liftIO newEmptyTMVarIO @@ -241,7 +242,7 @@ withP2PConnections workerpool proxyconnectionpoolsize a = do Right (Right (Left clusteruuid)) -> proxyconnection $ openProxyConnectionToCluster workerpool (connectionProtocolVersion connparams) - bypass clusteruuid + bypass clusteruuid clusterconcurrency Left ex -> return $ Left $ ConnectionFailed $ show ex where @@ -557,16 +558,20 @@ openProxyConnectionToRemote workerpool clientmaxversion bypass remote = (Proxy.closeRemoteSide remoteside) concurrencyconfig +type ClusterConcurrency = Int + openProxyConnectionToCluster :: AnnexWorkerPool -> P2P.ProtocolVersion -> P2P.Bypass -> ClusterUUID + -> ClusterConcurrency -> IO (Either SomeException ProxyConnection) -openProxyConnectionToCluster workerpool clientmaxversion bypass clusteruuid = +openProxyConnectionToCluster workerpool clientmaxversion bypass clusteruuid concurrency = inAnnexWorker' workerpool $ do - (proxyselector, closenodes, concurrencyconfig) <- + (proxyselector, closenodes) <- clusterProxySelector clusteruuid clientmaxversion bypass + concurrencyconfig <- Proxy.mkConcurrencyConfig concurrency liftIO $ openedProxyConnection (fromClusterUUID clusteruuid) proxyselector closenodes concurrencyconfig diff --git a/P2P/Proxy.hs b/P2P/Proxy.hs index 5e5835b418..f3f1edcae0 100644 --- a/P2P/Proxy.hs +++ b/P2P/Proxy.hs @@ -659,10 +659,13 @@ proxyRequest proxydone proxyparams requestcomplete requestmessage protoerrhandle data ConcurrencyConfig = ConcurrencyConfig Int (MSem.MSem Int) noConcurrencyConfig :: Annex ConcurrencyConfig -noConcurrencyConfig = liftIO $ ConcurrencyConfig 1 <$> MSem.new 1 +noConcurrencyConfig = mkConcurrencyConfig 1 -getConcurrencyConfig :: Annex ConcurrencyConfig -getConcurrencyConfig = (annexJobs <$> Annex.getGitConfig) >>= \case +mkConcurrencyConfig :: Int -> Annex ConcurrencyConfig +mkConcurrencyConfig n = liftIO $ ConcurrencyConfig n <$> MSem.new n + +concurrencyConfigJobs :: Annex ConcurrencyConfig +concurrencyConfigJobs = (annexJobs <$> Annex.getGitConfig) >>= \case NonConcurrent -> noConcurrencyConfig Concurrent n -> go n ConcurrentPerCpu -> go =<< liftIO getNumProcessors @@ -672,8 +675,7 @@ getConcurrencyConfig = (annexJobs <$> Annex.getGitConfig) >>= \case when (n > c) $ liftIO $ setNumCapabilities n setConcurrency (ConcurrencyGitConfig (Concurrent n)) - msem <- liftIO $ MSem.new n - return (ConcurrencyConfig n msem) + mkConcurrencyConfig n forMC :: ConcurrencyConfig -> [a] -> (a -> Annex b) -> Annex [b] forMC _ (x:[]) a = do diff --git a/doc/git-annex-p2phttp.mdwn b/doc/git-annex-p2phttp.mdwn index 25497ceae6..d1a4bc70b6 100644 --- a/doc/git-annex-p2phttp.mdwn +++ b/doc/git-annex-p2phttp.mdwn @@ -32,21 +32,37 @@ convenient way to download the content of any key, by using the path * `--jobs=N` `-JN` This or annex.jobs must be set to configure the number of worker - threads. + threads that serve connections to the webserver. - Since the webserver itself uses one thread, this needs to be set to - 2 or more. + Since the webserver itself also uses one of these threads, + this needs to be set to 2 or more. - A good choice is one worker per CPU core: `--jobs=cpus` + A good choice is often one worker per CPU core: `--jobs=cpus` * `--proxyconnections=N` - When is command is run in a repository that is configured to act as a + When this command is run in a repository that is configured to act as a proxy for some of its remotes, this is the maximum number of idle connections to keep open to proxied remotes. The default is 1. +* `--clusterjobs=N` + + When this command is run in a repository that is a gateway for a cluster, + this is the number of concurrent jobs to use to access nodes of the + cluster, per connection to the webserver. + + The default is 1. + + A good choice for this will be a balance between the number of nodes + in the cluster and the value of `--jobs`. + + For example, if the cluster has 4 nodes, and `--jobs=4`, using + `--clusterjobs=4` will make all nodes in the cluster be accessed + concurrently, which is often optimal. But around 20 cores can be needed + when the webserver is busy. + * `--port=N` Port to listen on. The default is port 9417, which is the default @@ -122,6 +138,10 @@ git-http-backend(1) [[git-annex-updateproxy]](1) +[[git-annex-initcluster]](1) + +[[git-annex-updatecluster]](1) + # AUTHOR diff --git a/doc/git-annex-updateproxy.mdwn b/doc/git-annex-updateproxy.mdwn index 10e26413f3..e9b77256c5 100644 --- a/doc/git-annex-updateproxy.mdwn +++ b/doc/git-annex-updateproxy.mdwn @@ -26,7 +26,7 @@ it. Then after pulling from "work", git-annex will know about an additional remote, "work-foo". That remote will be accessed using "work" as a proxy. -Proxies can only be accessed via ssh. +Proxies can only be accessed via ssh or by an annex+http url. # OPTIONS diff --git a/doc/tips/clusters.mdwn b/doc/tips/clusters.mdwn index e9fc4d5bcc..fe79bb3875 100644 --- a/doc/tips/clusters.mdwn +++ b/doc/tips/clusters.mdwn @@ -12,8 +12,8 @@ special remotes. ## using a cluster To use a cluster, your repository needs to have its gateway configured as a -remote. Clusters can currently only be accessed via ssh. This gateway -remote is added the same as any other git remote: +remote. Clusters can currently only be accessed via ssh or by a annex+http +url. This gateway remote is added the same as any other git remote: $ git remote add bigserver me@bigserver:annex diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index eced3becfe..f3b3d99550 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -32,11 +32,6 @@ Planned schedule of work: * git-annex testremote cluster -* Support proxying to git remotes using annex+http urls. - (Current documentation says proxying only works with ssh remotes, - so current state is not confusing, but this still needs to be done - eventually.) - ## completed items for July's work on p2p protocol over http * HTTP P2P protocol design [[design/p2p_protocol_over_http]]. @@ -53,14 +48,15 @@ Planned schedule of work: * Make http server support proxying. +* Make http server support serving a cluster. + ## items deferred until later for p2p protocol over http +* Support proxying to git remotes that use annex+http urls. + * `git-annex p2phttp` could support systemd socket activation. This would allow making a systemd unit that listens on port 80. -* `git-annex p2phttp` could serve `.git/annex/p2phttp/.well-known/`, - allowing it to be used by an ACME client to get certificates. - ## items deferred until later for [[design/passthrough_proxy]] * Check annex.diskreserve when proxying for special remotes