2024-07-07 16:08:10 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
|
|
|
- Copyright 2024 Joey Hess <id@joeyh.name>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE DataKinds #-}
|
|
|
|
{-# LANGUAGE TypeOperators #-}
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
|
|
|
module Command.P2PHttp where
|
|
|
|
|
|
|
|
import Command
|
2024-07-23 18:12:03 +00:00
|
|
|
import P2P.Http.Server
|
2024-07-23 13:12:21 +00:00
|
|
|
import P2P.Http.Url
|
2024-07-09 17:37:55 +00:00
|
|
|
import qualified P2P.Protocol as P2P
|
2024-07-10 03:44:40 +00:00
|
|
|
import Utility.Env
|
2024-07-07 16:08:10 +00:00
|
|
|
|
2024-07-24 13:45:14 +00:00
|
|
|
import Servant
|
convert lockcontent api to http long polling
Websockets would work, but the problem with using them for this is that
each lockcontent call is a separate websocket connection. And that's an
actual TCP connection. One TCP connection per file dropped would be too
expensive. With http long polling, regular http pipelining can be used,
so it will reuse a TCP connection.
Unfortunately, at least with servant, bi-directional streams with long
polling don't result in true bidirectional full duplex communication.
Servant processes the whole client body stream before generating the server
body stream. I think it's entirely possible to do full bi-directional
communication over http, but it would need changes to servant.
And, there's no way for the client to tell if the server successfully
locked the content, since the server will keep processing the client
stream no matter what.:
So, added a new api endpoint, keeplocked. lockcontent will lock the key
for 10 minutes with retention lock, and then a call to keeplocked will
keep it locked for as long as needed. This does mean that there will
need to be a Map of locks by key, and I will probably want to add
some kind of lock identifier that lockcontent returns.
2024-07-08 14:40:38 +00:00
|
|
|
import qualified Network.Wai.Handler.Warp as Warp
|
2024-07-23 19:19:56 +00:00
|
|
|
import qualified Network.Wai.Handler.WarpTLS as Warp
|
2024-07-10 03:44:40 +00:00
|
|
|
import Network.Socket (PortNumber)
|
|
|
|
import qualified Data.Map as M
|
2024-07-23 19:19:56 +00:00
|
|
|
import Data.String
|
convert lockcontent api to http long polling
Websockets would work, but the problem with using them for this is that
each lockcontent call is a separate websocket connection. And that's an
actual TCP connection. One TCP connection per file dropped would be too
expensive. With http long polling, regular http pipelining can be used,
so it will reuse a TCP connection.
Unfortunately, at least with servant, bi-directional streams with long
polling don't result in true bidirectional full duplex communication.
Servant processes the whole client body stream before generating the server
body stream. I think it's entirely possible to do full bi-directional
communication over http, but it would need changes to servant.
And, there's no way for the client to tell if the server successfully
locked the content, since the server will keep processing the client
stream no matter what.:
So, added a new api endpoint, keeplocked. lockcontent will lock the key
for 10 minutes with retention lock, and then a call to keeplocked will
keep it locked for as long as needed. This does mean that there will
need to be a Map of locks by key, and I will probably want to add
some kind of lock identifier that lockcontent returns.
2024-07-08 14:40:38 +00:00
|
|
|
|
2024-07-07 16:08:10 +00:00
|
|
|
cmd :: Command
|
2024-07-29 17:35:08 +00:00
|
|
|
cmd = noMessages $ withAnnexOptions [jobsOption] $
|
|
|
|
command "p2phttp" SectionPlumbing
|
|
|
|
"communicate in P2P protocol over http"
|
|
|
|
paramNothing (seek <$$> optParser)
|
2024-07-09 21:30:55 +00:00
|
|
|
|
|
|
|
data Options = Options
|
2024-07-10 03:44:40 +00:00
|
|
|
{ portOption :: Maybe PortNumber
|
2024-07-23 19:19:56 +00:00
|
|
|
, bindOption :: Maybe String
|
2024-07-23 19:37:36 +00:00
|
|
|
, certFileOption :: Maybe FilePath
|
|
|
|
, privateKeyFileOption :: Maybe FilePath
|
|
|
|
, chainFileOption :: [FilePath]
|
2024-07-09 21:30:55 +00:00
|
|
|
, authEnvOption :: Bool
|
|
|
|
, authEnvHttpOption :: Bool
|
2024-07-10 03:44:40 +00:00
|
|
|
, unauthReadOnlyOption :: Bool
|
|
|
|
, unauthAppendOnlyOption :: Bool
|
2024-07-09 21:30:55 +00:00
|
|
|
, wideOpenOption :: Bool
|
implement proxy connection pool
removeOldestProxyConnectionPool will be innefficient the larger the pool
is. A better data structure could be more efficient. Eg, make each value
in the pool include the timestamp of its oldest element, then the oldest
value can be found and modified, rather than rebuilding the whole Map.
But, for pools of a few hundred items, this should be fine. It's O(n*n log n)
or so.
Also, when more than 1 connection with the same pool key exists,
it's efficient even for larger pools, since removeOldestProxyConnectionPool
is not needed.
The default of 1 idle connection could perhaps be larger.. like the
number of jobs? Otoh, it seems good to ramp up and down the number of
connections, which does happen. With 1, there is at most one stale
connection, which might cause a request to fail.
2024-07-26 20:34:08 +00:00
|
|
|
, proxyConnectionsOption :: Maybe Integer
|
2024-07-28 14:36:22 +00:00
|
|
|
, clusterJobsOption :: Maybe Int
|
2024-07-09 21:30:55 +00:00
|
|
|
}
|
2024-07-07 16:08:10 +00:00
|
|
|
|
2024-07-10 03:44:40 +00:00
|
|
|
optParser :: CmdParamsDesc -> Parser Options
|
|
|
|
optParser _ = Options
|
|
|
|
<$> optional (option auto
|
|
|
|
( long "port" <> metavar paramNumber
|
|
|
|
<> help "specify port to listen on"
|
|
|
|
))
|
2024-07-23 19:19:56 +00:00
|
|
|
<*> optional (strOption
|
|
|
|
( long "bind" <> metavar paramAddress
|
|
|
|
<> help "specify address to bind to"
|
|
|
|
))
|
2024-07-23 19:37:36 +00:00
|
|
|
<*> optional (strOption
|
|
|
|
( long "certfile" <> metavar paramFile
|
|
|
|
<> help "TLS certificate file for HTTPS"
|
|
|
|
))
|
|
|
|
<*> optional (strOption
|
|
|
|
( long "privatekeyfile" <> metavar paramFile
|
|
|
|
<> help "TLS private key file for HTTPS"
|
|
|
|
))
|
|
|
|
<*> many (strOption
|
|
|
|
( long "chainfile" <> metavar paramFile
|
|
|
|
<> help "TLS chain file"
|
|
|
|
))
|
2024-07-10 03:44:40 +00:00
|
|
|
<*> switch
|
|
|
|
( long "authenv"
|
|
|
|
<> help "authenticate users from environment (https only)"
|
|
|
|
)
|
|
|
|
<*> switch
|
|
|
|
( long "authenv-http"
|
|
|
|
<> help "authenticate users from environment (including http)"
|
|
|
|
)
|
|
|
|
<*> switch
|
|
|
|
( long "unauth-readonly"
|
|
|
|
<> help "allow unauthenticated users to read the repository"
|
|
|
|
)
|
|
|
|
<*> switch
|
|
|
|
( long "unauth-appendonly"
|
|
|
|
<> help "allow unauthenticated users to read and append to the repository"
|
|
|
|
)
|
|
|
|
<*> switch
|
|
|
|
( long "wideopen"
|
|
|
|
<> help "give unauthenticated users full read+write access"
|
|
|
|
)
|
implement proxy connection pool
removeOldestProxyConnectionPool will be innefficient the larger the pool
is. A better data structure could be more efficient. Eg, make each value
in the pool include the timestamp of its oldest element, then the oldest
value can be found and modified, rather than rebuilding the whole Map.
But, for pools of a few hundred items, this should be fine. It's O(n*n log n)
or so.
Also, when more than 1 connection with the same pool key exists,
it's efficient even for larger pools, since removeOldestProxyConnectionPool
is not needed.
The default of 1 idle connection could perhaps be larger.. like the
number of jobs? Otoh, it seems good to ramp up and down the number of
connections, which does happen. With 1, there is at most one stale
connection, which might cause a request to fail.
2024-07-26 20:34:08 +00:00
|
|
|
<*> optional (option auto
|
|
|
|
( long "proxyconnections" <> metavar paramNumber
|
|
|
|
<> help "maximum number of idle connections when proxying"
|
|
|
|
))
|
2024-07-28 14:36:22 +00:00
|
|
|
<*> optional (option auto
|
|
|
|
( long "clusterjobs" <> metavar paramNumber
|
|
|
|
<> help "number of concurrent node accesses per connection"
|
|
|
|
))
|
2024-07-10 03:44:40 +00:00
|
|
|
|
|
|
|
seek :: Options -> CommandSeek
|
2024-07-23 18:31:32 +00:00
|
|
|
seek o = getAnnexWorkerPool $ \workerpool ->
|
2024-07-28 14:36:22 +00:00
|
|
|
withP2PConnections workerpool
|
|
|
|
(fromMaybe 1 $ proxyConnectionsOption o)
|
|
|
|
(fromMaybe 1 $ clusterJobsOption o)
|
implement proxy connection pool
removeOldestProxyConnectionPool will be innefficient the larger the pool
is. A better data structure could be more efficient. Eg, make each value
in the pool include the timestamp of its oldest element, then the oldest
value can be found and modified, rather than rebuilding the whole Map.
But, for pools of a few hundred items, this should be fine. It's O(n*n log n)
or so.
Also, when more than 1 connection with the same pool key exists,
it's efficient even for larger pools, since removeOldestProxyConnectionPool
is not needed.
The default of 1 idle connection could perhaps be larger.. like the
number of jobs? Otoh, it seems good to ramp up and down the number of
connections, which does happen. With 1, there is at most one stale
connection, which might cause a request to fail.
2024-07-26 20:34:08 +00:00
|
|
|
(go workerpool)
|
|
|
|
where
|
|
|
|
go workerpool acquireconn = liftIO $ do
|
2024-07-10 03:44:40 +00:00
|
|
|
authenv <- getAuthEnv
|
2024-07-10 16:19:47 +00:00
|
|
|
st <- mkP2PHttpServerState acquireconn workerpool $
|
2024-07-10 03:44:40 +00:00
|
|
|
mkGetServerMode authenv o
|
2024-07-23 19:19:56 +00:00
|
|
|
let settings = Warp.setPort port $ Warp.setHost host $
|
|
|
|
Warp.defaultSettings
|
2024-07-23 19:37:36 +00:00
|
|
|
case (certFileOption o, privateKeyFileOption o) of
|
|
|
|
(Nothing, Nothing) -> Warp.runSettings settings (p2pHttpApp st)
|
|
|
|
(Just certfile, Just privatekeyfile) -> do
|
|
|
|
let tlssettings = Warp.tlsSettingsChain
|
|
|
|
certfile (chainFileOption o) privatekeyfile
|
|
|
|
Warp.runTLS tlssettings settings (p2pHttpApp st)
|
|
|
|
_ -> giveup "You must use both --certfile and --privatekeyfile options to enable HTTPS."
|
implement proxy connection pool
removeOldestProxyConnectionPool will be innefficient the larger the pool
is. A better data structure could be more efficient. Eg, make each value
in the pool include the timestamp of its oldest element, then the oldest
value can be found and modified, rather than rebuilding the whole Map.
But, for pools of a few hundred items, this should be fine. It's O(n*n log n)
or so.
Also, when more than 1 connection with the same pool key exists,
it's efficient even for larger pools, since removeOldestProxyConnectionPool
is not needed.
The default of 1 idle connection could perhaps be larger.. like the
number of jobs? Otoh, it seems good to ramp up and down the number of
connections, which does happen. With 1, there is at most one stale
connection, which might cause a request to fail.
2024-07-26 20:34:08 +00:00
|
|
|
|
2024-07-23 19:19:56 +00:00
|
|
|
port = maybe
|
2024-07-23 13:12:21 +00:00
|
|
|
(fromIntegral defaultP2PHttpProtocolPort)
|
2024-07-23 19:19:56 +00:00
|
|
|
fromIntegral
|
2024-07-23 13:12:21 +00:00
|
|
|
(portOption o)
|
2024-07-23 19:19:56 +00:00
|
|
|
host = maybe
|
|
|
|
(fromString "*") -- both ipv4 and ipv6
|
|
|
|
fromString
|
|
|
|
(bindOption o)
|
2024-07-10 03:44:40 +00:00
|
|
|
|
|
|
|
mkGetServerMode :: M.Map Auth P2P.ServerMode -> Options -> GetServerMode
|
|
|
|
mkGetServerMode _ o _ Nothing
|
|
|
|
| wideOpenOption o = Just P2P.ServeReadWrite
|
|
|
|
| unauthAppendOnlyOption o = Just P2P.ServeAppendOnly
|
|
|
|
| unauthReadOnlyOption o = Just P2P.ServeReadOnly
|
|
|
|
| otherwise = Nothing
|
|
|
|
mkGetServerMode authenv o issecure (Just auth) =
|
|
|
|
case (issecure, authEnvOption o, authEnvHttpOption o) of
|
|
|
|
(Secure, True, _) -> checkauth
|
|
|
|
(NotSecure, _, True) -> checkauth
|
|
|
|
_ -> noauth
|
|
|
|
where
|
|
|
|
checkauth = case M.lookup auth authenv of
|
|
|
|
Just servermode -> Just servermode
|
|
|
|
Nothing -> noauth
|
|
|
|
noauth = mkGetServerMode authenv o issecure Nothing
|
|
|
|
|
|
|
|
getAuthEnv :: IO (M.Map Auth P2P.ServerMode)
|
|
|
|
getAuthEnv = do
|
|
|
|
environ <- getEnvironment
|
|
|
|
let permmap = M.fromList (mapMaybe parseperms environ)
|
|
|
|
return $ M.fromList $
|
|
|
|
map (addperms permmap) $
|
|
|
|
mapMaybe parseusername environ
|
|
|
|
where
|
|
|
|
parseperms (k, v) = case deprefix "GIT_ANNEX_P2PHTTP_PERMISSIONS_" k of
|
|
|
|
Nothing -> Nothing
|
|
|
|
Just username -> case v of
|
|
|
|
"readonly" -> Just
|
|
|
|
(encodeBS username, P2P.ServeReadOnly)
|
|
|
|
"appendonly" -> Just
|
|
|
|
(encodeBS username, P2P.ServeAppendOnly)
|
|
|
|
_ -> Nothing
|
|
|
|
|
|
|
|
parseusername (k, v) = case deprefix "GIT_ANNEX_P2PHTTP_PASSWORD_" k of
|
|
|
|
Nothing -> Nothing
|
|
|
|
Just username -> Just $ Auth (encodeBS username) (encodeBS v)
|
|
|
|
|
|
|
|
deprefix prefix s
|
|
|
|
| prefix `isPrefixOf` s = Just (drop (length prefix) s)
|
|
|
|
| otherwise = Nothing
|
|
|
|
|
|
|
|
addperms permmap auth@(Auth user _) =
|
|
|
|
case M.lookup user permmap of
|
|
|
|
Nothing -> (auth, P2P.ServeReadWrite)
|
|
|
|
Just perms -> (auth, perms)
|