rethought the async protocol some more

Moving jobid generation to the git-annex side lets it be simplified a
lot.

Note that it will also be possible to generate one jobid per connection,
rather than a new job per request. That will make overflow not an issue,
and will avoid some work, and will simplify some of the code.
This commit is contained in:
Joey Hess 2020-08-13 20:18:06 -04:00
parent 59cbb42ee2
commit 72561563d9
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 104 additions and 182 deletions

View file

@ -26,25 +26,17 @@ import qualified Data.Map.Strict as M
runRelayToExternalAsync :: External -> ExternalState -> IO ExternalAsyncRelay runRelayToExternalAsync :: External -> ExternalState -> IO ExternalAsyncRelay
runRelayToExternalAsync external st = do runRelayToExternalAsync external st = do
jidmap <- newTVarIO M.empty jidmap <- newTVarIO M.empty
mapjid <- newTVarIO M.empty
commcounter <- newTVarIO 0
newconns <- newTVarIO []
sendq <- newSendQueue sendq <- newSendQueue
void $ async $ sendloop st newconns mapjid jidmap sendq nextjid <- newTVarIO (JobId 1)
void $ async $ receiveloop external st newconns jidmap mapjid sendq void $ async $ sendloop st nextjid jidmap sendq
void $ async $ receiveloop external st jidmap sendq
return $ ExternalAsyncRelay $ do return $ ExternalAsyncRelay $ do
n <- atomically $ do jidv <- newTVarIO Nothing
n <- readTVar commcounter
let n' = succ n
writeTVar commcounter n'
return n'
receiveq <- newReceiveQueue receiveq <- newReceiveQueue
return $ ExternalState return $ ExternalState
{ externalSend = \msg -> { externalSend = \msg ->
atomically $ writeTBMChan sendq atomically $ writeTBMChan sendq
( toAsyncWrapped msg (toAsyncWrapped msg, (jidv, receiveq))
, (n, receiveq)
)
, externalReceive = atomically (readTBMChan receiveq) , externalReceive = atomically (readTBMChan receiveq)
-- This shuts down the whole relay. -- This shuts down the whole relay.
, externalShutdown = shutdown external st sendq , externalShutdown = shutdown external st sendq
@ -62,102 +54,64 @@ type ReceiveQueue = TBMChan String
type SendQueue = TBMChan (AsyncWrapped, Conn) type SendQueue = TBMChan (AsyncWrapped, Conn)
type ConnNum = Integer type Conn = (TVar (Maybe JobId), ReceiveQueue)
type Conn = (ConnNum, ReceiveQueue)
type NewConns = TVar [Conn]
type MapJid = TVar (M.Map ConnNum JobId)
type JidMap = TVar (M.Map JobId Conn) type JidMap = TVar (M.Map JobId Conn)
type NextJid = TVar JobId
newReceiveQueue :: IO ReceiveQueue newReceiveQueue :: IO ReceiveQueue
newReceiveQueue = newTBMChanIO 10 newReceiveQueue = newTBMChanIO 10
newSendQueue :: IO SendQueue newSendQueue :: IO SendQueue
newSendQueue = newTBMChanIO 10 newSendQueue = newTBMChanIO 10
receiveloop :: External -> ExternalState -> NewConns -> JidMap -> MapJid -> SendQueue -> IO () receiveloop :: External -> ExternalState -> JidMap -> SendQueue -> IO ()
receiveloop external st newconns jidmap mapjid sendq = externalReceive st >>= \case receiveloop external st jidmap sendq = externalReceive st >>= \case
Just l -> case parseMessage l :: Maybe AsyncMessage of Just l -> case parseMessage l :: Maybe AsyncMessage of
Just (RESULT_ASYNC msg) -> getnext newconns >>= \case Just (AsyncMessage jid msg) ->
Just (_n, c) -> do M.lookup jid <$> readTVarIO jidmap >>= \case
relayto c msg Just (_jidv, c) -> do
loop atomically $ writeTBMChan c msg
Nothing -> protoerr "unexpected RESULT-ASYNC" receiveloop external st jidmap sendq
Just (START_ASYNC jid) -> getnext newconns >>= \case Nothing -> protoerr "unknown job number"
Just v@(n, _c) -> do
atomically $ do
modifyTVar' jidmap $ M.insert jid v
modifyTVar' mapjid $ M.insert n jid
loop
Nothing -> protoerr "unexpected START-ASYNC"
Just (ASYNC jid msg) -> getjid jid >>= \case
Just (_n, c) -> do
relayto c msg
loop
Nothing -> protoerr "ASYNC with unknown jobid"
_ -> protoerr "unexpected non-async message" _ -> protoerr "unexpected non-async message"
Nothing -> closeandshutdown Nothing -> closeandshutdown
where where
loop = receiveloop external st newconns jidmap mapjid sendq
relayto q msg = atomically $ writeTBMChan q msg
closerelayto q = atomically $ closeTBMChan q
getnext l = atomically $ readTVar l >>= \case
[] -> return Nothing
(c:rest) -> do
writeTVar l rest
return (Just c)
getjid jid = M.lookup jid <$> readTVarIO jidmap
protoerr s = do protoerr s = do
warningIO $ "async external special remote protocol error: " ++ s warningIO $ "async external special remote protocol error: " ++ s
closeandshutdown closeandshutdown
closeandshutdown = do closeandshutdown = do
shutdown external st sendq True shutdown external st sendq True
(m, l) <- atomically $ (,) m <- atomically $ readTVar jidmap
<$> readTVar jidmap forM_ (M.elems m) (atomically . closeTBMChan . snd)
<*> readTVar newconns
forM_ (M.elems m ++ l) (closerelayto . snd)
sendloop :: ExternalState -> NewConns -> MapJid -> JidMap -> SendQueue -> IO () sendloop :: ExternalState -> NextJid -> JidMap -> SendQueue -> IO ()
sendloop st newconns mapjid jidmap sendq = atomically (readTBMChan sendq) >>= \case sendloop st nextjid jidmap sendq = atomically (readTBMChan sendq) >>= \case
Just (wrappedmsg, c@(n, _)) -> do Just (wrappedmsg, conn@(jidv, _)) -> do
let newconn = atomically $ do
-- This append is not too expensive,
-- because the list length is limited
-- to the maximum number of jobs.
modifyTVar' newconns (++[c])
M.lookup n <$> readTVar mapjid >>= \case
Nothing -> return ()
Just jid -> do
modifyTVar' jidmap (M.delete jid)
modifyTVar' mapjid (M.delete n)
case wrappedmsg of case wrappedmsg of
AsyncWrappedRequest msg -> do AsyncWrappedRequest msg -> do
newconn jid <- atomically $ do
externalSend st msg jid@(JobId n) <- readTVar nextjid
AsyncWrappedExceptionalMessage msg -> do let !jid' = JobId (succ n)
newconn writeTVar nextjid jid'
externalSend st msg writeTVar jidv (Just jid)
modifyTVar' jidmap $ M.insert jid conn
return jid
externalSend st $ wrapjid msg jid
AsyncWrappedRemoteResponse msg -> AsyncWrappedRemoteResponse msg ->
externalSend st =<< wrapremoteresponse msg n readTVarIO jidv >>= \case
AsyncWrappedAsyncReply msg -> Just jid -> externalSend st $ wrapjid msg jid
Nothing -> error "failed to find jid"
AsyncWrappedExceptionalMessage msg ->
externalSend st msg externalSend st msg
sendloop st newconns mapjid jidmap sendq AsyncWrappedAsyncMessage msg ->
externalSend st msg
sendloop st nextjid jidmap sendq
Nothing -> return () Nothing -> return ()
where where
wrapremoteresponse msg n = wrapjid msg jid = AsyncMessage jid $ unwords $ Proto.formatMessage msg
M.lookup n <$> readTVarIO mapjid >>= \case
Just jid -> return $ REPLY_ASYNC jid $
unwords $ Proto.formatMessage msg
Nothing -> error "failed to find jobid"
shutdown :: External -> ExternalState -> SendQueue -> Bool -> IO () shutdown :: External -> ExternalState -> SendQueue -> Bool -> IO ()
shutdown external st sendq b = do shutdown external st sendq b = do

View file

@ -32,10 +32,9 @@ module Remote.External.Types (
RemoteResponse(..), RemoteResponse(..),
ExceptionalMessage(..), ExceptionalMessage(..),
AsyncMessage(..), AsyncMessage(..),
AsyncReply(..),
AsyncWrapped(..), AsyncWrapped(..),
ToAsyncWrapped(..), ToAsyncWrapped(..),
JobId, JobId(..),
ErrorMsg, ErrorMsg,
Setting, Setting,
Description, Description,
@ -59,6 +58,7 @@ import qualified Utility.SimpleProtocol as Proto
import Control.Concurrent.STM import Control.Concurrent.STM
import Network.URI import Network.URI
import Data.Char import Data.Char
import Text.Read
data External = External data External = External
{ externalType :: ExternalType { externalType :: ExternalType
@ -363,35 +363,24 @@ instance Proto.Receivable ExceptionalMessage where
parseCommand "ERROR" = Proto.parse1 ERROR parseCommand "ERROR" = Proto.parse1 ERROR
parseCommand _ = Proto.parseFail parseCommand _ = Proto.parseFail
-- Messages sent by the special remote in the async protocol extension. data AsyncMessage = AsyncMessage JobId WrappedMsg
data AsyncMessage
= START_ASYNC JobId
| ASYNC JobId WrappedMsg
| RESULT_ASYNC WrappedMsg
-- Reply sent in the async protocol extension.
data AsyncReply
= REPLY_ASYNC JobId WrappedMsg
instance Proto.Receivable AsyncMessage where instance Proto.Receivable AsyncMessage where
parseCommand "START-ASYNC" = Proto.parse1 START_ASYNC parseCommand "J" = Proto.parse2 AsyncMessage
parseCommand "ASYNC" = Proto.parse2 ASYNC
parseCommand "RESULT-ASYNC" = Proto.parse1 RESULT_ASYNC
parseCommand _ = Proto.parseFail parseCommand _ = Proto.parseFail
instance Proto.Sendable AsyncReply where instance Proto.Sendable AsyncMessage where
formatMessage (REPLY_ASYNC jid msg) = ["REPLY-ASYNC", jid, msg] formatMessage (AsyncMessage jid msg) = ["J", Proto.serialize jid, msg]
data AsyncWrapped data AsyncWrapped
= AsyncWrappedRemoteResponse RemoteResponse = AsyncWrappedRemoteResponse RemoteResponse
| AsyncWrappedRequest Request | AsyncWrappedRequest Request
| AsyncWrappedExceptionalMessage ExceptionalMessage | AsyncWrappedExceptionalMessage ExceptionalMessage
| AsyncWrappedAsyncReply AsyncReply | AsyncWrappedAsyncMessage AsyncMessage
class ToAsyncWrapped t where class ToAsyncWrapped t where
toAsyncWrapped :: t -> AsyncWrapped toAsyncWrapped :: t -> AsyncWrapped
-- | RemoteResponse is sent wrapped in an async message.
instance ToAsyncWrapped RemoteResponse where instance ToAsyncWrapped RemoteResponse where
toAsyncWrapped = AsyncWrappedRemoteResponse toAsyncWrapped = AsyncWrappedRemoteResponse
@ -401,8 +390,8 @@ instance ToAsyncWrapped Request where
instance ToAsyncWrapped ExceptionalMessage where instance ToAsyncWrapped ExceptionalMessage where
toAsyncWrapped = AsyncWrappedExceptionalMessage toAsyncWrapped = AsyncWrappedExceptionalMessage
instance ToAsyncWrapped AsyncReply where instance ToAsyncWrapped AsyncMessage where
toAsyncWrapped = AsyncWrappedAsyncReply toAsyncWrapped = AsyncWrappedAsyncMessage
-- Data types used for parameters when communicating with the remote. -- Data types used for parameters when communicating with the remote.
-- All are serializable. -- All are serializable.
@ -411,12 +400,17 @@ type Setting = String
type Description = String type Description = String
type ProtocolVersion = Int type ProtocolVersion = Int
type Size = Maybe Integer type Size = Maybe Integer
type JobId = String
type WrappedMsg = String type WrappedMsg = String
newtype JobId = JobId Integer
deriving (Eq, Ord, Show)
supportedProtocolVersions :: [ProtocolVersion] supportedProtocolVersions :: [ProtocolVersion]
supportedProtocolVersions = [1] supportedProtocolVersions = [1]
instance Proto.Serializable JobId where
serialize (JobId n) = show n
deserialize = JobId <$$> readMaybe
instance Proto.Serializable Direction where instance Proto.Serializable Direction where
serialize Upload = "STORE" serialize Upload = "STORE"
serialize Download = "RETRIEVE" serialize Download = "RETRIEVE"

View file

@ -7,11 +7,11 @@ This is an appendix to the [[external_special_remote_protocol]].
## introduction ## introduction
Normally, an external special remote can only be used to do one thing at a Normally, an external special remote can only be used to do one thing at a
time. When git-annex has concurrency enabled, it will start up multiple time, and when git-annex has concurrency enabled, it will start up multiple
processes for the same external special remote. processes for the same external special remote.
This extension lets a single external special remote process handle This extension lets a single external special remote process handle
multiple concurrent requests, which can be useful if multiple processes multiple concurrent jobs, which can be useful if multiple processes
would use too many resources, or if it can be better coordinated using a would use too many resources, or if it can be better coordinated using a
single process. single process.
@ -28,96 +28,70 @@ that includes `ASYNC`, and the external special remote responding in kind.
EXTENSIONS INFO ASYNC EXTENSIONS INFO ASYNC
EXTENSIONS ASYNC EXTENSIONS ASYNC
From this point forward, *everything* that the external special remote From this point forward, every message in the protocol is tagged with a job
has to be wrapped in the async protocol. Messages git-annex sends are number, by prefixing it with "J n".
unchanged.
Generally the first message git-annex sends will be PREPARE. As usual, the first message git-annex sends is generally PREPARE:
PREPARE J 1 PREPARE
Rather than just responding PREPARE-SUCCESS, it has to be wrapped Rather than just responding PREPARE-SUCCESS, the job number has to be
in the async protocol: included in the reply:
RESULT-ASYNC PREPARE-SUCCESS J 1 PREPARE-SUCCESS
Suppose git-annex wants to make some transfers. So it sends: Suppose git-annex wants to make some transfers. It can request several
at the same time, using different job numbers:
TRANSFER RETRIEVE Key1 file1 J 2 TRANSFER RETRIEVE Key1 file1
J 3 TRANSFER RETRIEVE Key2 file2
The special remote should respond with an unique identifier for this The special remote can now perform both transfers at the same time.
async job that it's going to start. The identifier can If it sends PROGRESS messages for these transfers, they have to be tagged
be anything you want to use, but an incrementing number is a with the job number:
reasonable choice. (The Key itself is not a good choice, because git-annex
could make different requests involving the same Key.)
START-ASYNC 1 J 2 PROGRESS 10
J 3 PROGRESS 500
J 2 PROGRESS 20
Once that's sent, git-annex can send its next request immediately, The special remote can also send messages that query git-annex for some
while that transfer is still running. For example, it might request a information. These messages and the reply will also be tagged with a job
second transfer, and the special remote can reply when it's started that number.
transfer too:
TRANSFER RETRIEVE 2 file2 J 2 GETCONFIG url
START-ASYNC 2 J 4 RETRIEVE Key3 file3
J 2 VALUE http://example.com/
When the special remote sends a message, such as PROGRESS, it has to One transfers are done, the special remote sends `TRANSFER-SUCCESS` tagged
wrap it in ASYNC, to specify the job identifier. with the job number.
ASYNC 1 PROGRESS 10 J 3 TRANSFER-SUCCESS RETRIEVE Key2
ASYNC 2 PROGRESS 500 J 2 PROGRESS 100
ASYNC 1 PROGRESS 20 J 2 TRANSFER-SUCCESS RETRIEVE Key1
This can also be used to query git-annex for some information. Lots of different jobs can be requested at the same time.
The reply to a query is eventually sent back wrapped in `REPLY-ASYNC`.
ASYNC 1 GETCONFIG url J 4 CHECKPRESENT Key3
TRANSFER RETRIEVE 3 file3 J 5 CHECKPRESENT Key4
REPLY-ASYNC 1 VALUE http://example.com/ J 6 REMOVE Key3
J 4 CHECKPRESENT-SUCCESS Key3
J 6 REMOVE-SUCCESS Key3
J 5 CHECKPRESENT-FAILURE Key4
Once a transfer is done, the special remote indicates this by A job always starts with a request by git-annex, and once the special
wrapping the usual `TRANSFER-SUCCESS` or remote sends a reply -- or replies -- to that request, the job is done.
`TRANSFER-FAILURE` message in `ASYNC`.
ASYNC 2 TRANSFER-SUCCESS RETRIEVE Key2 An example of sending multiple replies to a request is `LISTCONFIGS`, eg:
ASYNC Key1 PROGRESS 100
ASYNC 1 TRANSFER-SUCCESS RETRIEVE Key1
Not only transfers, but everything the special remote sends to git-annex J 7 LISTCONFIGS
has to be wrapped in the async protocol. J 7 CONFIG foo some config
J 7 CONFIG bar other config
J 7 CONFIGEND
CHECKPRESENT Key3 Job numbers are not reused within a given run of a special remote,
START-ASYNC 3 but once git-annex has seen the last message it expects for a job,
CHECKPRESENT Key4 sending other messages tagged with that job number will be rejected
START-ASYNC 4 as a protocol error.
ASYNC 3 CHECKPRESENT-SUCCESS Key3
REMOVE Key3
ASYNC 4 CHECKPRESENT-FAILURE Key4
START-ASYNC 5
ASYNC 5 REMOVE-SUCCESS Key3
## added messages To avoid overflow, job numbers should be treated as at least 64 bit
values (or as strings) by the special remote.
Here's the details about the additions to the protocol.
* `START-ASYNC JobId`
This (or `RESULT-ASYNC` must be sent in response to all requests
git-annex sends after `EXTENSIONS` has been used to negotiate the
async protocol.
The JobId is a unique value, typically an incrementing number.
This does not need to be sent immediately after git-annex sends a request;
other messages can be sent in between. But the next START-ASYNC git-annex sees
after sending a request tells it the JobId that will be used for that request.
* `ASYNC JobId Msg`
All the usual protocol messages that are sent by the external special
remote must be wrapped in this, to specify which job the message relates
to.
* `RESULT-ASYNC ResultMsg`
This is the same as sending `START-ASYNC` immediately followed by
`ASYNC` with a result message. This is often used to respond to
`PREPARE` other things that are trivial or just don't need to be handled
async.
* `REPLY-ASYNC JobId Reply`
Sent by git-annex when an `ASYNC` requested a reply.
Note that this may not be the next message received from
git-annex immediately after sending an `ASYNC` request.