2024-07-07 16:08:10 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
|
|
|
- Copyright 2024 Joey Hess <id@joeyh.name>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE DataKinds #-}
|
|
|
|
{-# LANGUAGE TypeOperators #-}
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
|
|
|
module Command.P2PHttp where
|
|
|
|
|
2024-11-21 18:15:14 +00:00
|
|
|
import Command hiding (jobsOption)
|
2024-07-23 18:12:03 +00:00
|
|
|
import P2P.Http.Server
|
2024-07-23 13:12:21 +00:00
|
|
|
import P2P.Http.Url
|
2024-07-09 17:37:55 +00:00
|
|
|
import qualified P2P.Protocol as P2P
|
2024-07-10 03:44:40 +00:00
|
|
|
import Utility.Env
|
2024-11-21 17:53:23 +00:00
|
|
|
import Annex.UUID
|
|
|
|
import qualified Git
|
|
|
|
import qualified Git.Construct
|
|
|
|
import qualified Annex
|
2024-11-21 18:15:14 +00:00
|
|
|
import Types.Concurrency
|
2024-07-07 16:08:10 +00:00
|
|
|
|
2024-07-24 13:45:14 +00:00
|
|
|
import Servant
|
convert lockcontent api to http long polling
Websockets would work, but the problem with using them for this is that
each lockcontent call is a separate websocket connection. And that's an
actual TCP connection. One TCP connection per file dropped would be too
expensive. With http long polling, regular http pipelining can be used,
so it will reuse a TCP connection.
Unfortunately, at least with servant, bi-directional streams with long
polling don't result in true bidirectional full duplex communication.
Servant processes the whole client body stream before generating the server
body stream. I think it's entirely possible to do full bi-directional
communication over http, but it would need changes to servant.
And, there's no way for the client to tell if the server successfully
locked the content, since the server will keep processing the client
stream no matter what.:
So, added a new api endpoint, keeplocked. lockcontent will lock the key
for 10 minutes with retention lock, and then a call to keeplocked will
keep it locked for as long as needed. This does mean that there will
need to be a Map of locks by key, and I will probably want to add
some kind of lock identifier that lockcontent returns.
2024-07-08 14:40:38 +00:00
|
|
|
import qualified Network.Wai.Handler.Warp as Warp
|
2024-07-23 19:19:56 +00:00
|
|
|
import qualified Network.Wai.Handler.WarpTLS as Warp
|
2024-07-10 03:44:40 +00:00
|
|
|
import Network.Socket (PortNumber)
|
|
|
|
import qualified Data.Map as M
|
2024-07-23 19:19:56 +00:00
|
|
|
import Data.String
|
2024-11-21 19:09:12 +00:00
|
|
|
import Control.Concurrent.STM
|
convert lockcontent api to http long polling
Websockets would work, but the problem with using them for this is that
each lockcontent call is a separate websocket connection. And that's an
actual TCP connection. One TCP connection per file dropped would be too
expensive. With http long polling, regular http pipelining can be used,
so it will reuse a TCP connection.
Unfortunately, at least with servant, bi-directional streams with long
polling don't result in true bidirectional full duplex communication.
Servant processes the whole client body stream before generating the server
body stream. I think it's entirely possible to do full bi-directional
communication over http, but it would need changes to servant.
And, there's no way for the client to tell if the server successfully
locked the content, since the server will keep processing the client
stream no matter what.:
So, added a new api endpoint, keeplocked. lockcontent will lock the key
for 10 minutes with retention lock, and then a call to keeplocked will
keep it locked for as long as needed. This does mean that there will
need to be a Map of locks by key, and I will probably want to add
some kind of lock identifier that lockcontent returns.
2024-07-08 14:40:38 +00:00
|
|
|
|
2024-07-07 16:08:10 +00:00
|
|
|
cmd :: Command
|
2024-11-21 18:15:14 +00:00
|
|
|
cmd = noMessages $ dontCheck repoExists $
|
|
|
|
noRepo (startIO <$$> optParser) $
|
|
|
|
command "p2phttp" SectionPlumbing
|
|
|
|
"communicate in P2P protocol over http"
|
|
|
|
paramNothing (startAnnex <$$> optParser)
|
2024-07-09 21:30:55 +00:00
|
|
|
|
|
|
|
data Options = Options
|
2024-07-10 03:44:40 +00:00
|
|
|
{ portOption :: Maybe PortNumber
|
2024-07-23 19:19:56 +00:00
|
|
|
, bindOption :: Maybe String
|
2024-07-23 19:37:36 +00:00
|
|
|
, certFileOption :: Maybe FilePath
|
|
|
|
, privateKeyFileOption :: Maybe FilePath
|
|
|
|
, chainFileOption :: [FilePath]
|
2024-07-09 21:30:55 +00:00
|
|
|
, authEnvOption :: Bool
|
|
|
|
, authEnvHttpOption :: Bool
|
2024-07-10 03:44:40 +00:00
|
|
|
, unauthReadOnlyOption :: Bool
|
|
|
|
, unauthAppendOnlyOption :: Bool
|
2024-10-21 14:02:12 +00:00
|
|
|
, unauthNoLockingOption :: Bool
|
2024-07-09 21:30:55 +00:00
|
|
|
, wideOpenOption :: Bool
|
implement proxy connection pool
removeOldestProxyConnectionPool will be innefficient the larger the pool
is. A better data structure could be more efficient. Eg, make each value
in the pool include the timestamp of its oldest element, then the oldest
value can be found and modified, rather than rebuilding the whole Map.
But, for pools of a few hundred items, this should be fine. It's O(n*n log n)
or so.
Also, when more than 1 connection with the same pool key exists,
it's efficient even for larger pools, since removeOldestProxyConnectionPool
is not needed.
The default of 1 idle connection could perhaps be larger.. like the
number of jobs? Otoh, it seems good to ramp up and down the number of
connections, which does happen. With 1, there is at most one stale
connection, which might cause a request to fail.
2024-07-26 20:34:08 +00:00
|
|
|
, proxyConnectionsOption :: Maybe Integer
|
2024-11-21 18:15:14 +00:00
|
|
|
, jobsOption :: Maybe Concurrency
|
2024-07-28 14:36:22 +00:00
|
|
|
, clusterJobsOption :: Maybe Int
|
2024-11-21 17:53:23 +00:00
|
|
|
, directoryOption :: [FilePath]
|
2024-07-09 21:30:55 +00:00
|
|
|
}
|
2024-07-07 16:08:10 +00:00
|
|
|
|
2024-07-10 03:44:40 +00:00
|
|
|
optParser :: CmdParamsDesc -> Parser Options
|
|
|
|
optParser _ = Options
|
|
|
|
<$> optional (option auto
|
|
|
|
( long "port" <> metavar paramNumber
|
|
|
|
<> help "specify port to listen on"
|
|
|
|
))
|
2024-07-23 19:19:56 +00:00
|
|
|
<*> optional (strOption
|
|
|
|
( long "bind" <> metavar paramAddress
|
|
|
|
<> help "specify address to bind to"
|
|
|
|
))
|
2024-07-23 19:37:36 +00:00
|
|
|
<*> optional (strOption
|
|
|
|
( long "certfile" <> metavar paramFile
|
|
|
|
<> help "TLS certificate file for HTTPS"
|
|
|
|
))
|
|
|
|
<*> optional (strOption
|
|
|
|
( long "privatekeyfile" <> metavar paramFile
|
|
|
|
<> help "TLS private key file for HTTPS"
|
|
|
|
))
|
|
|
|
<*> many (strOption
|
|
|
|
( long "chainfile" <> metavar paramFile
|
|
|
|
<> help "TLS chain file"
|
|
|
|
))
|
2024-07-10 03:44:40 +00:00
|
|
|
<*> switch
|
|
|
|
( long "authenv"
|
|
|
|
<> help "authenticate users from environment (https only)"
|
|
|
|
)
|
|
|
|
<*> switch
|
|
|
|
( long "authenv-http"
|
|
|
|
<> help "authenticate users from environment (including http)"
|
|
|
|
)
|
|
|
|
<*> switch
|
|
|
|
( long "unauth-readonly"
|
|
|
|
<> help "allow unauthenticated users to read the repository"
|
|
|
|
)
|
|
|
|
<*> switch
|
|
|
|
( long "unauth-appendonly"
|
|
|
|
<> help "allow unauthenticated users to read and append to the repository"
|
|
|
|
)
|
2024-10-21 14:02:12 +00:00
|
|
|
<*> switch
|
|
|
|
( long "unauth-nolocking"
|
|
|
|
<> help "prevent unauthenticated users from locking content in the repository"
|
|
|
|
)
|
2024-07-10 03:44:40 +00:00
|
|
|
<*> switch
|
|
|
|
( long "wideopen"
|
|
|
|
<> help "give unauthenticated users full read+write access"
|
|
|
|
)
|
implement proxy connection pool
removeOldestProxyConnectionPool will be innefficient the larger the pool
is. A better data structure could be more efficient. Eg, make each value
in the pool include the timestamp of its oldest element, then the oldest
value can be found and modified, rather than rebuilding the whole Map.
But, for pools of a few hundred items, this should be fine. It's O(n*n log n)
or so.
Also, when more than 1 connection with the same pool key exists,
it's efficient even for larger pools, since removeOldestProxyConnectionPool
is not needed.
The default of 1 idle connection could perhaps be larger.. like the
number of jobs? Otoh, it seems good to ramp up and down the number of
connections, which does happen. With 1, there is at most one stale
connection, which might cause a request to fail.
2024-07-26 20:34:08 +00:00
|
|
|
<*> optional (option auto
|
|
|
|
( long "proxyconnections" <> metavar paramNumber
|
|
|
|
<> help "maximum number of idle connections when proxying"
|
|
|
|
))
|
2024-11-21 18:15:14 +00:00
|
|
|
<*> optional jobsOptionParser
|
2024-07-28 14:36:22 +00:00
|
|
|
<*> optional (option auto
|
|
|
|
( long "clusterjobs" <> metavar paramNumber
|
|
|
|
<> help "number of concurrent node accesses per connection"
|
|
|
|
))
|
2024-11-21 17:53:23 +00:00
|
|
|
<*> many (strOption
|
|
|
|
( long "directory" <> metavar paramPath
|
|
|
|
<> help "serve repositories in subdirectories of a directory"
|
|
|
|
))
|
2024-07-10 03:44:40 +00:00
|
|
|
|
2024-11-21 17:53:23 +00:00
|
|
|
startAnnex :: Options -> Annex ()
|
|
|
|
startAnnex o
|
|
|
|
| null (directoryOption o) = ifM ((/=) NoUUID <$> getUUID)
|
|
|
|
( do
|
|
|
|
authenv <- liftIO getAuthEnv
|
|
|
|
st <- mkServerState o authenv
|
|
|
|
liftIO $ runServer o st
|
|
|
|
-- Run in a git repository that is not a git-annex repository.
|
|
|
|
, liftIO $ startIO o
|
|
|
|
)
|
|
|
|
| otherwise = liftIO $ startIO o
|
|
|
|
|
|
|
|
startIO :: Options -> IO ()
|
|
|
|
startIO o
|
|
|
|
| null (directoryOption o) =
|
|
|
|
giveup "Use the --directory option to specify which git-annex repositories to serve."
|
|
|
|
| otherwise = do
|
2024-07-10 03:44:40 +00:00
|
|
|
authenv <- getAuthEnv
|
2024-11-21 19:09:12 +00:00
|
|
|
st <- mkst authenv mempty
|
|
|
|
runServer o st
|
|
|
|
where
|
|
|
|
mkst authenv oldst = do
|
2024-11-21 17:53:23 +00:00
|
|
|
repos <- findRepos o
|
|
|
|
sts <- forM repos $ \r -> do
|
|
|
|
strd <- Annex.new r
|
2024-11-21 19:09:12 +00:00
|
|
|
Annex.eval strd (mkstannex authenv oldst)
|
|
|
|
return (mconcat sts)
|
|
|
|
{ updateRepos = updaterepos authenv
|
|
|
|
}
|
|
|
|
|
|
|
|
mkstannex authenv oldst = do
|
|
|
|
u <- getUUID
|
|
|
|
if u == NoUUID
|
|
|
|
then return mempty
|
|
|
|
else case M.lookup u (servedRepos oldst) of
|
|
|
|
Nothing -> mkServerState o authenv
|
|
|
|
Just old -> return $ P2PHttpServerState
|
|
|
|
{ servedRepos = M.singleton u old
|
|
|
|
, serverShutdownCleanup = mempty
|
|
|
|
, updateRepos = mempty
|
|
|
|
}
|
|
|
|
|
|
|
|
updaterepos authenv oldst = do
|
|
|
|
newst <- mkst authenv oldst
|
|
|
|
return $ newst
|
|
|
|
{ serverShutdownCleanup =
|
|
|
|
serverShutdownCleanup newst
|
|
|
|
<> serverShutdownCleanup oldst
|
|
|
|
}
|
2024-11-21 17:53:23 +00:00
|
|
|
|
|
|
|
runServer :: Options -> P2PHttpServerState -> IO ()
|
|
|
|
runServer o mst = go `finally` serverShutdownCleanup mst
|
|
|
|
where
|
|
|
|
go = do
|
2024-07-23 19:19:56 +00:00
|
|
|
let settings = Warp.setPort port $ Warp.setHost host $
|
|
|
|
Warp.defaultSettings
|
2024-11-21 19:09:12 +00:00
|
|
|
mstv <- newTMVarIO mst
|
2024-07-23 19:37:36 +00:00
|
|
|
case (certFileOption o, privateKeyFileOption o) of
|
2024-11-21 19:09:12 +00:00
|
|
|
(Nothing, Nothing) -> Warp.runSettings settings (p2pHttpApp mstv)
|
2024-07-23 19:37:36 +00:00
|
|
|
(Just certfile, Just privatekeyfile) -> do
|
|
|
|
let tlssettings = Warp.tlsSettingsChain
|
|
|
|
certfile (chainFileOption o) privatekeyfile
|
2024-11-21 19:09:12 +00:00
|
|
|
Warp.runTLS tlssettings settings (p2pHttpApp mstv)
|
2024-07-23 19:37:36 +00:00
|
|
|
_ -> giveup "You must use both --certfile and --privatekeyfile options to enable HTTPS."
|
2024-07-23 19:19:56 +00:00
|
|
|
port = maybe
|
2024-07-23 13:12:21 +00:00
|
|
|
(fromIntegral defaultP2PHttpProtocolPort)
|
2024-07-23 19:19:56 +00:00
|
|
|
fromIntegral
|
2024-07-23 13:12:21 +00:00
|
|
|
(portOption o)
|
2024-07-23 19:19:56 +00:00
|
|
|
host = maybe
|
|
|
|
(fromString "*") -- both ipv4 and ipv6
|
|
|
|
fromString
|
|
|
|
(bindOption o)
|
2024-07-10 03:44:40 +00:00
|
|
|
|
2024-11-21 17:53:23 +00:00
|
|
|
mkServerState :: Options -> M.Map Auth P2P.ServerMode -> Annex P2PHttpServerState
|
|
|
|
mkServerState o authenv =
|
2024-11-21 18:15:14 +00:00
|
|
|
withAnnexWorkerPool (jobsOption o) $
|
2024-11-21 17:53:23 +00:00
|
|
|
mkP2PHttpServerState
|
|
|
|
(mkGetServerMode authenv o)
|
2024-11-21 19:09:12 +00:00
|
|
|
return
|
2024-11-21 17:53:23 +00:00
|
|
|
(fromMaybe 1 $ proxyConnectionsOption o)
|
|
|
|
(fromMaybe 1 $ clusterJobsOption o)
|
|
|
|
|
2024-07-10 03:44:40 +00:00
|
|
|
mkGetServerMode :: M.Map Auth P2P.ServerMode -> Options -> GetServerMode
|
|
|
|
mkGetServerMode _ o _ Nothing
|
2024-10-21 14:02:12 +00:00
|
|
|
| wideOpenOption o = ServerMode
|
|
|
|
{ serverMode = P2P.ServeReadWrite
|
|
|
|
, unauthenticatedLockingAllowed = unauthlock
|
|
|
|
, authenticationAllowed = False
|
|
|
|
}
|
|
|
|
| unauthAppendOnlyOption o = ServerMode
|
|
|
|
{ serverMode = P2P.ServeAppendOnly
|
|
|
|
, unauthenticatedLockingAllowed = unauthlock
|
|
|
|
, authenticationAllowed = canauth
|
|
|
|
}
|
|
|
|
| unauthReadOnlyOption o = ServerMode
|
|
|
|
{ serverMode = P2P.ServeReadOnly
|
|
|
|
, unauthenticatedLockingAllowed = unauthlock
|
|
|
|
, authenticationAllowed = canauth
|
|
|
|
}
|
2024-10-17 15:10:28 +00:00
|
|
|
| otherwise = CannotServeRequests
|
|
|
|
where
|
|
|
|
canauth = authEnvOption o || authEnvHttpOption o
|
2024-10-21 14:02:12 +00:00
|
|
|
unauthlock = not (unauthNoLockingOption o)
|
2024-07-10 03:44:40 +00:00
|
|
|
mkGetServerMode authenv o issecure (Just auth) =
|
|
|
|
case (issecure, authEnvOption o, authEnvHttpOption o) of
|
|
|
|
(Secure, True, _) -> checkauth
|
|
|
|
(NotSecure, _, True) -> checkauth
|
|
|
|
_ -> noauth
|
|
|
|
where
|
|
|
|
checkauth = case M.lookup auth authenv of
|
2024-10-21 14:02:12 +00:00
|
|
|
Just servermode -> ServerMode
|
|
|
|
{ serverMode = servermode
|
|
|
|
, authenticationAllowed = False
|
|
|
|
, unauthenticatedLockingAllowed = False
|
|
|
|
}
|
2024-07-10 03:44:40 +00:00
|
|
|
Nothing -> noauth
|
2024-10-17 15:10:28 +00:00
|
|
|
noauth = mkGetServerMode authenv noautho issecure Nothing
|
|
|
|
noautho = o { authEnvOption = False, authEnvHttpOption = False }
|
2024-07-10 03:44:40 +00:00
|
|
|
|
|
|
|
getAuthEnv :: IO (M.Map Auth P2P.ServerMode)
|
|
|
|
getAuthEnv = do
|
|
|
|
environ <- getEnvironment
|
|
|
|
let permmap = M.fromList (mapMaybe parseperms environ)
|
|
|
|
return $ M.fromList $
|
|
|
|
map (addperms permmap) $
|
|
|
|
mapMaybe parseusername environ
|
|
|
|
where
|
|
|
|
parseperms (k, v) = case deprefix "GIT_ANNEX_P2PHTTP_PERMISSIONS_" k of
|
|
|
|
Nothing -> Nothing
|
|
|
|
Just username -> case v of
|
|
|
|
"readonly" -> Just
|
|
|
|
(encodeBS username, P2P.ServeReadOnly)
|
|
|
|
"appendonly" -> Just
|
|
|
|
(encodeBS username, P2P.ServeAppendOnly)
|
|
|
|
_ -> Nothing
|
|
|
|
|
|
|
|
parseusername (k, v) = case deprefix "GIT_ANNEX_P2PHTTP_PASSWORD_" k of
|
|
|
|
Nothing -> Nothing
|
|
|
|
Just username -> Just $ Auth (encodeBS username) (encodeBS v)
|
|
|
|
|
|
|
|
deprefix prefix s
|
|
|
|
| prefix `isPrefixOf` s = Just (drop (length prefix) s)
|
|
|
|
| otherwise = Nothing
|
|
|
|
|
|
|
|
addperms permmap auth@(Auth user _) =
|
|
|
|
case M.lookup user permmap of
|
|
|
|
Nothing -> (auth, P2P.ServeReadWrite)
|
|
|
|
Just perms -> (auth, perms)
|
2024-11-21 17:53:23 +00:00
|
|
|
|
|
|
|
findRepos :: Options -> IO [Git.Repo]
|
|
|
|
findRepos o = do
|
|
|
|
files <- map toRawFilePath . concat
|
|
|
|
<$> mapM dirContents (directoryOption o)
|
|
|
|
map Git.Construct.newFrom . catMaybes
|
|
|
|
<$> mapM Git.Construct.checkForRepo files
|
|
|
|
|