2016-11-17 21:19:04 +00:00
|
|
|
{- P2P protocol
|
2018-03-07 19:15:23 +00:00
|
|
|
-
|
|
|
|
- See doc/design/p2p_protocol.mdwn
|
2016-11-17 21:19:04 +00:00
|
|
|
-
|
2018-03-12 17:43:19 +00:00
|
|
|
- Copyright 2016-2018 Joey Hess <id@joeyh.name>
|
2016-11-17 21:19:04 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
2016-12-02 20:39:01 +00:00
|
|
|
{-# LANGUAGE DeriveFunctor, TemplateHaskell, FlexibleContexts #-}
|
|
|
|
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, RankNTypes #-}
|
|
|
|
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
2016-11-17 21:19:04 +00:00
|
|
|
|
2016-11-22 18:34:49 +00:00
|
|
|
module P2P.Protocol where
|
2016-11-17 21:19:04 +00:00
|
|
|
|
|
|
|
import qualified Utility.SimpleProtocol as Proto
|
2018-03-13 18:18:30 +00:00
|
|
|
import Types (Annex)
|
2016-11-17 21:19:04 +00:00
|
|
|
import Types.Key
|
|
|
|
import Types.UUID
|
2018-03-13 18:18:30 +00:00
|
|
|
import Types.Remote (Verification(..), unVerified)
|
2016-11-22 18:18:34 +00:00
|
|
|
import Utility.AuthToken
|
2016-11-17 21:19:04 +00:00
|
|
|
import Utility.Applicative
|
|
|
|
import Utility.PartialPrelude
|
2016-12-07 17:37:35 +00:00
|
|
|
import Utility.Metered
|
2016-12-02 20:39:01 +00:00
|
|
|
import Git.FilePath
|
2016-12-09 18:52:38 +00:00
|
|
|
import Annex.ChangedRefs (ChangedRefs)
|
2016-11-17 21:19:04 +00:00
|
|
|
|
|
|
|
import Control.Monad
|
|
|
|
import Control.Monad.Free
|
|
|
|
import Control.Monad.Free.TH
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
import Control.Monad.Catch
|
2016-12-02 20:39:01 +00:00
|
|
|
import System.FilePath
|
2016-11-19 20:30:57 +00:00
|
|
|
import System.Exit (ExitCode(..))
|
2016-11-20 16:08:16 +00:00
|
|
|
import System.IO
|
2016-11-17 21:19:04 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
2016-12-02 20:39:01 +00:00
|
|
|
import Data.Char
|
2016-12-08 17:58:03 +00:00
|
|
|
import Control.Applicative
|
|
|
|
import Prelude
|
2016-11-17 21:19:04 +00:00
|
|
|
|
|
|
|
newtype Offset = Offset Integer
|
|
|
|
deriving (Show)
|
|
|
|
|
|
|
|
newtype Len = Len Integer
|
|
|
|
deriving (Show)
|
|
|
|
|
2018-03-12 17:43:19 +00:00
|
|
|
newtype ProtocolVersion = ProtocolVersion Integer
|
|
|
|
deriving (Show, Eq, Ord)
|
|
|
|
|
|
|
|
defaultProtocolVersion :: ProtocolVersion
|
|
|
|
defaultProtocolVersion = ProtocolVersion 0
|
|
|
|
|
|
|
|
maxProtocolVersion :: ProtocolVersion
|
2018-03-13 18:18:30 +00:00
|
|
|
maxProtocolVersion = ProtocolVersion 1
|
2018-03-12 17:43:19 +00:00
|
|
|
|
2018-03-06 18:58:24 +00:00
|
|
|
-- | Service as used by the connect message in gitremote-helpers(1)
|
2016-11-19 20:30:57 +00:00
|
|
|
data Service = UploadPack | ReceivePack
|
|
|
|
deriving (Show)
|
|
|
|
|
2018-03-13 18:18:30 +00:00
|
|
|
data Validity = Valid | Invalid
|
|
|
|
deriving (Show)
|
|
|
|
|
2016-11-17 21:19:04 +00:00
|
|
|
-- | Messages in the protocol. The peer that makes the connection
|
|
|
|
-- always initiates requests, and the other peer makes responses to them.
|
|
|
|
data Message
|
|
|
|
= AUTH UUID AuthToken -- uuid of the peer that is authenticating
|
|
|
|
| AUTH_SUCCESS UUID -- uuid of the remote peer
|
|
|
|
| AUTH_FAILURE
|
2018-03-12 17:43:19 +00:00
|
|
|
| VERSION ProtocolVersion
|
2016-11-19 20:30:57 +00:00
|
|
|
| CONNECT Service
|
|
|
|
| CONNECTDONE ExitCode
|
2016-12-09 18:52:38 +00:00
|
|
|
| NOTIFYCHANGE
|
|
|
|
| CHANGED ChangedRefs
|
2016-11-18 01:56:02 +00:00
|
|
|
| CHECKPRESENT Key
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
| LOCKCONTENT Key
|
|
|
|
| UNLOCKCONTENT
|
2016-11-18 01:48:59 +00:00
|
|
|
| REMOVE Key
|
2016-12-02 20:39:01 +00:00
|
|
|
| GET Offset AssociatedFile Key
|
|
|
|
| PUT AssociatedFile Key
|
2016-11-17 21:19:04 +00:00
|
|
|
| PUT_FROM Offset
|
2016-11-18 01:37:49 +00:00
|
|
|
| ALREADY_HAVE
|
2016-11-17 21:19:04 +00:00
|
|
|
| SUCCESS
|
|
|
|
| FAILURE
|
2016-11-19 20:30:57 +00:00
|
|
|
| DATA Len -- followed by bytes of data
|
2018-03-13 18:18:30 +00:00
|
|
|
| VALIDITY Validity
|
2016-11-18 02:06:59 +00:00
|
|
|
| ERROR String
|
2016-11-17 21:19:04 +00:00
|
|
|
deriving (Show)
|
|
|
|
|
2016-11-20 16:08:16 +00:00
|
|
|
instance Proto.Sendable Message where
|
|
|
|
formatMessage (AUTH uuid authtoken) = ["AUTH", Proto.serialize uuid, Proto.serialize authtoken]
|
2016-12-16 22:26:07 +00:00
|
|
|
formatMessage (AUTH_SUCCESS uuid) = ["AUTH-SUCCESS", Proto.serialize uuid]
|
2016-11-20 16:08:16 +00:00
|
|
|
formatMessage AUTH_FAILURE = ["AUTH-FAILURE"]
|
2018-03-12 17:43:19 +00:00
|
|
|
formatMessage (VERSION v) = ["VERSION", Proto.serialize v]
|
2016-11-20 16:08:16 +00:00
|
|
|
formatMessage (CONNECT service) = ["CONNECT", Proto.serialize service]
|
|
|
|
formatMessage (CONNECTDONE exitcode) = ["CONNECTDONE", Proto.serialize exitcode]
|
2016-12-09 18:52:38 +00:00
|
|
|
formatMessage NOTIFYCHANGE = ["NOTIFYCHANGE"]
|
|
|
|
formatMessage (CHANGED refs) = ["CHANGED", Proto.serialize refs]
|
2016-11-20 16:08:16 +00:00
|
|
|
formatMessage (CHECKPRESENT key) = ["CHECKPRESENT", Proto.serialize key]
|
|
|
|
formatMessage (LOCKCONTENT key) = ["LOCKCONTENT", Proto.serialize key]
|
|
|
|
formatMessage UNLOCKCONTENT = ["UNLOCKCONTENT"]
|
|
|
|
formatMessage (REMOVE key) = ["REMOVE", Proto.serialize key]
|
2016-12-02 20:39:01 +00:00
|
|
|
formatMessage (GET offset af key) = ["GET", Proto.serialize offset, Proto.serialize af, Proto.serialize key]
|
|
|
|
formatMessage (PUT af key) = ["PUT", Proto.serialize af, Proto.serialize key]
|
2016-11-20 16:08:16 +00:00
|
|
|
formatMessage (PUT_FROM offset) = ["PUT-FROM", Proto.serialize offset]
|
|
|
|
formatMessage ALREADY_HAVE = ["ALREADY-HAVE"]
|
|
|
|
formatMessage SUCCESS = ["SUCCESS"]
|
|
|
|
formatMessage FAILURE = ["FAILURE"]
|
2018-03-13 18:18:30 +00:00
|
|
|
formatMessage (VALIDITY Valid) = ["VALID"]
|
|
|
|
formatMessage (VALIDITY Invalid) = ["INVALID"]
|
2016-11-20 16:08:16 +00:00
|
|
|
formatMessage (DATA len) = ["DATA", Proto.serialize len]
|
|
|
|
formatMessage (ERROR err) = ["ERROR", Proto.serialize err]
|
|
|
|
|
|
|
|
instance Proto.Receivable Message where
|
|
|
|
parseCommand "AUTH" = Proto.parse2 AUTH
|
|
|
|
parseCommand "AUTH-SUCCESS" = Proto.parse1 AUTH_SUCCESS
|
|
|
|
parseCommand "AUTH-FAILURE" = Proto.parse0 AUTH_FAILURE
|
2018-03-12 17:43:19 +00:00
|
|
|
parseCommand "VERSION" = Proto.parse1 VERSION
|
2016-11-20 16:08:16 +00:00
|
|
|
parseCommand "CONNECT" = Proto.parse1 CONNECT
|
2016-11-21 23:33:57 +00:00
|
|
|
parseCommand "CONNECTDONE" = Proto.parse1 CONNECTDONE
|
2016-12-09 18:52:38 +00:00
|
|
|
parseCommand "NOTIFYCHANGE" = Proto.parse0 NOTIFYCHANGE
|
|
|
|
parseCommand "CHANGED" = Proto.parse1 CHANGED
|
2016-11-20 16:08:16 +00:00
|
|
|
parseCommand "CHECKPRESENT" = Proto.parse1 CHECKPRESENT
|
|
|
|
parseCommand "LOCKCONTENT" = Proto.parse1 LOCKCONTENT
|
|
|
|
parseCommand "UNLOCKCONTENT" = Proto.parse0 UNLOCKCONTENT
|
|
|
|
parseCommand "REMOVE" = Proto.parse1 REMOVE
|
2016-12-02 20:39:01 +00:00
|
|
|
parseCommand "GET" = Proto.parse3 GET
|
|
|
|
parseCommand "PUT" = Proto.parse2 PUT
|
2016-11-20 16:08:16 +00:00
|
|
|
parseCommand "PUT-FROM" = Proto.parse1 PUT_FROM
|
|
|
|
parseCommand "ALREADY-HAVE" = Proto.parse0 ALREADY_HAVE
|
|
|
|
parseCommand "SUCCESS" = Proto.parse0 SUCCESS
|
|
|
|
parseCommand "FAILURE" = Proto.parse0 FAILURE
|
|
|
|
parseCommand "DATA" = Proto.parse1 DATA
|
|
|
|
parseCommand "ERROR" = Proto.parse1 ERROR
|
2018-03-13 18:18:30 +00:00
|
|
|
parseCommand "VALID" = Proto.parse0 (VALIDITY Valid)
|
|
|
|
parseCommand "INVALID" = Proto.parse0 (VALIDITY Invalid)
|
2016-11-20 16:08:16 +00:00
|
|
|
parseCommand _ = Proto.parseFail
|
|
|
|
|
2018-03-12 17:43:19 +00:00
|
|
|
instance Proto.Serializable ProtocolVersion where
|
|
|
|
serialize (ProtocolVersion n) = show n
|
|
|
|
deserialize = ProtocolVersion <$$> readish
|
|
|
|
|
2016-11-20 16:08:16 +00:00
|
|
|
instance Proto.Serializable Offset where
|
|
|
|
serialize (Offset n) = show n
|
|
|
|
deserialize = Offset <$$> readish
|
|
|
|
|
|
|
|
instance Proto.Serializable Len where
|
|
|
|
serialize (Len n) = show n
|
|
|
|
deserialize = Len <$$> readish
|
|
|
|
|
|
|
|
instance Proto.Serializable Service where
|
|
|
|
serialize UploadPack = "git-upload-pack"
|
|
|
|
serialize ReceivePack = "git-receive-pack"
|
|
|
|
deserialize "git-upload-pack" = Just UploadPack
|
|
|
|
deserialize "git-receive-pack" = Just ReceivePack
|
|
|
|
deserialize _ = Nothing
|
|
|
|
|
2016-12-02 20:39:01 +00:00
|
|
|
-- | Since AssociatedFile is not the last thing in a protocol line,
|
|
|
|
-- its serialization cannot contain any whitespace. This is handled
|
|
|
|
-- by replacing whitespace with '%' (and '%' with '%%')
|
|
|
|
--
|
|
|
|
-- When deserializing an AssociatedFile from a peer, it's sanitized,
|
|
|
|
-- to avoid any unusual characters that might cause problems when it's
|
|
|
|
-- displayed to the user.
|
|
|
|
--
|
|
|
|
-- These mungings are ok, because an AssociatedFile is only ever displayed
|
|
|
|
-- to the user and does not need to match a file on disk.
|
|
|
|
instance Proto.Serializable AssociatedFile where
|
2017-03-10 17:12:24 +00:00
|
|
|
serialize (AssociatedFile Nothing) = ""
|
|
|
|
serialize (AssociatedFile (Just af)) =
|
|
|
|
toInternalGitPath $ concatMap esc af
|
2016-12-02 20:39:01 +00:00
|
|
|
where
|
|
|
|
esc '%' = "%%"
|
|
|
|
esc c
|
|
|
|
| isSpace c = "%"
|
|
|
|
| otherwise = [c]
|
|
|
|
|
|
|
|
deserialize s = case fromInternalGitPath $ deesc [] s of
|
2017-03-10 17:12:24 +00:00
|
|
|
[] -> Just (AssociatedFile Nothing)
|
2016-12-02 20:39:01 +00:00
|
|
|
f
|
2017-03-10 17:12:24 +00:00
|
|
|
| isRelative f -> Just (AssociatedFile (Just f))
|
2016-12-02 20:39:01 +00:00
|
|
|
| otherwise -> Nothing
|
|
|
|
where
|
|
|
|
deesc b [] = reverse b
|
|
|
|
deesc b ('%':'%':cs) = deesc ('%':b) cs
|
|
|
|
deesc b ('%':cs) = deesc ('_':b) cs
|
|
|
|
deesc b (c:cs)
|
|
|
|
| isControl c = deesc ('_':b) cs
|
|
|
|
| otherwise = deesc (c:b) cs
|
|
|
|
|
2016-11-20 16:08:16 +00:00
|
|
|
-- | Free monad for the protocol, combining net communication,
|
|
|
|
-- and local actions.
|
|
|
|
data ProtoF c = Net (NetF c) | Local (LocalF c)
|
|
|
|
deriving (Functor)
|
|
|
|
|
|
|
|
type Proto = Free ProtoF
|
|
|
|
|
|
|
|
net :: Net a -> Proto a
|
|
|
|
net = hoistFree Net
|
|
|
|
|
|
|
|
local :: Local a -> Proto a
|
|
|
|
local = hoistFree Local
|
|
|
|
|
|
|
|
data NetF c
|
|
|
|
= SendMessage Message c
|
2018-03-12 17:43:19 +00:00
|
|
|
| ReceiveMessage (Maybe Message -> c)
|
2016-12-07 17:37:35 +00:00
|
|
|
| SendBytes Len L.ByteString MeterUpdate c
|
2016-12-02 17:47:42 +00:00
|
|
|
-- ^ Sends exactly Len bytes of data. (Any more or less will
|
|
|
|
-- confuse the receiver.)
|
2016-12-07 18:25:01 +00:00
|
|
|
| ReceiveBytes Len MeterUpdate (L.ByteString -> c)
|
2016-12-02 17:47:42 +00:00
|
|
|
-- ^ Lazily reads bytes from peer. Stops once Len are read,
|
|
|
|
-- or if connection is lost, and in either case returns the bytes
|
|
|
|
-- that were read. This allows resuming interrupted transfers.
|
2016-11-20 20:42:18 +00:00
|
|
|
| CheckAuthToken UUID AuthToken (Bool -> c)
|
2016-11-21 23:24:55 +00:00
|
|
|
| RelayService Service c
|
|
|
|
-- ^ Runs a service, relays its output to the peer, and data
|
|
|
|
-- from the peer to it.
|
|
|
|
| Relay RelayHandle RelayHandle (ExitCode -> c)
|
|
|
|
-- ^ Reads from the first RelayHandle, and sends the data to a
|
|
|
|
-- peer, while at the same time accepting input from the peer
|
|
|
|
-- which is sent the the second RelayHandle. Continues until
|
|
|
|
-- the peer sends an ExitCode.
|
2018-03-12 19:19:40 +00:00
|
|
|
| SetProtocolVersion ProtocolVersion c
|
|
|
|
--- ^ Called when a new protocol version has been negotiated.
|
|
|
|
| GetProtocolVersion (ProtocolVersion -> c)
|
2016-11-20 16:08:16 +00:00
|
|
|
deriving (Functor)
|
|
|
|
|
|
|
|
type Net = Free NetF
|
|
|
|
|
|
|
|
newtype RelayHandle = RelayHandle Handle
|
|
|
|
|
|
|
|
data LocalF c
|
2016-12-02 17:47:42 +00:00
|
|
|
= TmpContentSize Key (Len -> c)
|
|
|
|
-- ^ Gets size of the temp file where received content may have
|
|
|
|
-- been stored. If not present, returns 0.
|
2016-12-06 19:05:44 +00:00
|
|
|
| FileSize FilePath (Len -> c)
|
|
|
|
-- ^ Gets size of the content of a file. If not present, returns 0.
|
2016-12-02 17:47:42 +00:00
|
|
|
| ContentSize Key (Maybe Len -> c)
|
|
|
|
-- ^ Gets size of the content of a key, when the full content is
|
|
|
|
-- present.
|
2018-03-13 18:18:30 +00:00
|
|
|
| ReadContent Key AssociatedFile Offset (L.ByteString -> Proto Validity -> Proto Bool) (Bool -> c)
|
2016-12-08 23:56:02 +00:00
|
|
|
-- ^ Reads the content of a key and sends it to the callback.
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
-- Must run the callback, or terminate the protocol connection.
|
|
|
|
--
|
Fix a P2P protocol hang
When readContent got Nothing from prepSendAnnex, it did not run its
callback, and the callback is what sends the DATA reply.
sendContent checks with contentSize that the object file is present, but
that doesn't really guarantee that prepSendAnnex won't return Nothing.
So, it was possible for a P2P protocol GET to not receive a response,
and appear to hang. When what it's really doing is waiting for the next
protocol command.
This seems most likely to happen when the annex is in direct mode, and the
file being requested has been modified. It could also happen in an indirect
mode repository if genInodeCache somehow failed. Perhaps due to a race
with a drop of the content file.
Fixed by making readContent behave the way its spec said it should,
and run the callback with L.empty in this case.
Note that, it's finee for readContent to send any amount of data
to the callback, including L.empty. sendBytes deals with that
by making sure it sends exactly the specified number of bytes,
aborting the protocol if it's too short. So, when L.empty is sent,
the protocol will end up aborting.
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-02 17:41:50 +00:00
|
|
|
-- May send any amount of data, including L.empty if the content is
|
|
|
|
-- not available. The callback must deal with that.
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
--
|
Fix a P2P protocol hang
When readContent got Nothing from prepSendAnnex, it did not run its
callback, and the callback is what sends the DATA reply.
sendContent checks with contentSize that the object file is present, but
that doesn't really guarantee that prepSendAnnex won't return Nothing.
So, it was possible for a P2P protocol GET to not receive a response,
and appear to hang. When what it's really doing is waiting for the next
protocol command.
This seems most likely to happen when the annex is in direct mode, and the
file being requested has been modified. It could also happen in an indirect
mode repository if genInodeCache somehow failed. Perhaps due to a race
with a drop of the content file.
Fixed by making readContent behave the way its spec said it should,
and run the callback with L.empty in this case.
Note that, it's finee for readContent to send any amount of data
to the callback, including L.empty. sendBytes deals with that
by making sure it sends exactly the specified number of bytes,
aborting the protocol if it's too short. So, when L.empty is sent,
the protocol will end up aborting.
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-02 17:41:50 +00:00
|
|
|
-- And the content may change while it's being sent.
|
2018-03-13 18:18:30 +00:00
|
|
|
-- The callback is passed a validity check that it can run after
|
|
|
|
-- sending the content to detect when this happened.
|
|
|
|
| StoreContent Key AssociatedFile Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) (Bool -> c)
|
2016-12-06 19:05:44 +00:00
|
|
|
-- ^ Stores content to the key's temp file starting at an offset.
|
2016-12-02 17:47:42 +00:00
|
|
|
-- Once the whole content of the key has been stored, moves the
|
2016-12-06 19:05:44 +00:00
|
|
|
-- temp file into place as the content of the key, and returns True.
|
|
|
|
--
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
-- Must consume the whole lazy ByteString, or if unable to do
|
|
|
|
-- so, terminate the protocol connection.
|
|
|
|
--
|
2018-03-13 18:18:30 +00:00
|
|
|
-- If the validity check is provided and fails, the content was
|
|
|
|
-- changed while it was being sent, so verificiation of the
|
|
|
|
-- received content should be forced.
|
2016-11-18 01:27:16 +00:00
|
|
|
--
|
|
|
|
-- Note: The ByteString may not contain the entire remaining content
|
2016-12-02 17:47:42 +00:00
|
|
|
-- of the key. Only once the temp file size == Len has the whole
|
2016-11-18 01:27:16 +00:00
|
|
|
-- content been transferred.
|
2018-03-13 18:18:30 +00:00
|
|
|
| StoreContentTo FilePath Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c)
|
|
|
|
-- ^ Like StoreContent, but stores the content to a temp file.
|
2016-11-20 16:08:16 +00:00
|
|
|
| SetPresent Key UUID c
|
|
|
|
| CheckContentPresent Key (Bool -> c)
|
2016-11-18 01:37:49 +00:00
|
|
|
-- ^ Checks if the whole content of the key is locally present.
|
2016-12-02 17:47:42 +00:00
|
|
|
| RemoveContent Key (Bool -> c)
|
|
|
|
-- ^ If the content is not present, still succeeds.
|
2016-11-18 01:48:59 +00:00
|
|
|
-- May fail if not enough copies to safely drop, etc.
|
2016-11-20 16:08:16 +00:00
|
|
|
| TryLockContent Key (Bool -> Proto ()) c
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
-- ^ Try to lock the content of a key, preventing it
|
2016-12-02 18:49:22 +00:00
|
|
|
-- from being deleted, while running the provided protocol
|
make sure that lockContentShared is always paired with an inAnnex check
lockContentShared had a screwy caveat that it didn't verify that the content
was present when locking it, but in the most common case, eg indirect mode,
it failed to lock when the content is not present.
That led to a few callers forgetting to check inAnnex when using it,
but the potential data loss was unlikely to be noticed because it only
affected direct mode I think.
Fix data loss bug when the local repository uses direct mode, and a
locally modified file is dropped from a remote repsitory. The bug
caused the modified file to be counted as a copy of the original file.
(This is not a severe bug because in such a situation, dropping
from the remote and then modifying the file is allowed and has the same
end result.)
And, in content locking over tor, when the remote repository is
in direct mode, it neglected to check that the content was actually
present when locking it. This could cause git annex drop to remove
the only copy of a file when it thought the tor remote had a copy.
So, make lockContentShared do its own inAnnex check. This could perhaps
be optimised for direct mode, to avoid the check then, since locking
the content necessarily verifies it exists there, but I have not bothered
with that.
This commit was sponsored by Jeff Goeke-Smith on Patreon.
2018-03-07 18:13:02 +00:00
|
|
|
-- action. If unable to lock the content, or the content is not
|
|
|
|
-- present, runs the protocol action with False.
|
2016-12-09 18:52:38 +00:00
|
|
|
| WaitRefChange (ChangedRefs -> c)
|
2018-03-13 01:46:58 +00:00
|
|
|
-- ^ Waits for one or more git refs to change and returns them.a
|
|
|
|
| UpdateMeterTotalSize Meter Integer c
|
|
|
|
-- ^ Updates the total size of a Meter, for cases where the size is
|
|
|
|
-- not known until the data is being received.
|
2018-03-13 18:18:30 +00:00
|
|
|
| RunValidityCheck (Annex Validity) (Validity -> c)
|
|
|
|
-- ^ Runs a deferred validity check.
|
2016-11-17 21:19:04 +00:00
|
|
|
deriving (Functor)
|
|
|
|
|
2016-11-20 16:08:16 +00:00
|
|
|
type Local = Free LocalF
|
2016-11-17 21:19:04 +00:00
|
|
|
|
2016-11-20 16:08:16 +00:00
|
|
|
-- Generate sendMessage etc functions for all free monad constructors.
|
|
|
|
$(makeFree ''NetF)
|
|
|
|
$(makeFree ''LocalF)
|
2016-11-17 21:19:04 +00:00
|
|
|
|
2018-03-12 17:43:19 +00:00
|
|
|
auth :: UUID -> AuthToken -> Proto () -> Proto (Maybe UUID)
|
|
|
|
auth myuuid t a = do
|
2016-11-20 16:08:16 +00:00
|
|
|
net $ sendMessage (AUTH myuuid t)
|
2018-03-12 17:43:19 +00:00
|
|
|
postAuth a
|
2018-03-08 18:02:18 +00:00
|
|
|
|
2018-03-12 17:43:19 +00:00
|
|
|
postAuth :: Proto () -> Proto (Maybe UUID)
|
|
|
|
postAuth a = do
|
2016-11-20 16:08:16 +00:00
|
|
|
r <- net receiveMessage
|
2016-11-17 21:19:04 +00:00
|
|
|
case r of
|
2018-03-12 17:43:19 +00:00
|
|
|
Just (AUTH_SUCCESS theiruuid) -> do
|
|
|
|
a
|
|
|
|
return $ Just theiruuid
|
|
|
|
Just AUTH_FAILURE -> return Nothing
|
2016-11-17 21:19:04 +00:00
|
|
|
_ -> do
|
2016-11-20 16:08:16 +00:00
|
|
|
net $ sendMessage (ERROR "auth failed")
|
2016-11-17 21:19:04 +00:00
|
|
|
return Nothing
|
|
|
|
|
2018-03-12 17:43:19 +00:00
|
|
|
negotiateProtocolVersion :: ProtocolVersion -> Proto ()
|
|
|
|
negotiateProtocolVersion preferredversion = do
|
|
|
|
net $ sendMessage (VERSION preferredversion)
|
|
|
|
r <- net receiveMessage
|
|
|
|
case r of
|
2018-03-12 19:19:40 +00:00
|
|
|
Just (VERSION v) -> net $ setProtocolVersion v
|
2018-03-12 17:43:19 +00:00
|
|
|
-- Old server doesn't know about the VERSION command.
|
|
|
|
Just (ERROR _) -> return ()
|
|
|
|
_ -> net $ sendMessage (ERROR "expected VERSION")
|
|
|
|
|
2016-11-18 01:56:02 +00:00
|
|
|
checkPresent :: Key -> Proto Bool
|
|
|
|
checkPresent key = do
|
2016-11-20 16:08:16 +00:00
|
|
|
net $ sendMessage (CHECKPRESENT key)
|
2016-11-18 01:56:02 +00:00
|
|
|
checkSuccess
|
|
|
|
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
{- Locks content to prevent it from being dropped, while running an action.
|
|
|
|
-
|
|
|
|
- Note that this only guarantees that the content is locked as long as the
|
|
|
|
- connection to the peer remains up. If the connection is unexpectededly
|
|
|
|
- dropped, the peer will then unlock the content.
|
|
|
|
-}
|
|
|
|
lockContentWhile
|
|
|
|
:: MonadMask m
|
2016-12-06 19:05:44 +00:00
|
|
|
=> (forall r. r -> Proto r -> m r)
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
-> Key
|
2016-12-06 19:05:44 +00:00
|
|
|
-> (Bool -> m a)
|
|
|
|
-> m a
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
lockContentWhile runproto key a = bracket setup cleanup a
|
|
|
|
where
|
2016-12-06 19:05:44 +00:00
|
|
|
setup = runproto False $ do
|
2016-11-20 16:08:16 +00:00
|
|
|
net $ sendMessage (LOCKCONTENT key)
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
checkSuccess
|
2016-12-06 19:05:44 +00:00
|
|
|
cleanup True = runproto () $ net $ sendMessage UNLOCKCONTENT
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
cleanup False = return ()
|
|
|
|
|
2016-11-18 01:48:59 +00:00
|
|
|
remove :: Key -> Proto Bool
|
|
|
|
remove key = do
|
2016-11-20 16:08:16 +00:00
|
|
|
net $ sendMessage (REMOVE key)
|
2016-11-18 01:48:59 +00:00
|
|
|
checkSuccess
|
|
|
|
|
2018-03-13 18:18:30 +00:00
|
|
|
get :: FilePath -> Key -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification)
|
2018-03-13 01:46:58 +00:00
|
|
|
get dest key af m p =
|
|
|
|
receiveContent (Just m) p sizer storer (\offset -> GET offset af key)
|
2016-12-06 19:05:44 +00:00
|
|
|
where
|
|
|
|
sizer = fileSize dest
|
|
|
|
storer = storeContentTo dest
|
2016-11-17 21:19:04 +00:00
|
|
|
|
2016-12-07 17:37:35 +00:00
|
|
|
put :: Key -> AssociatedFile -> MeterUpdate -> Proto Bool
|
|
|
|
put key af p = do
|
2016-12-02 20:39:01 +00:00
|
|
|
net $ sendMessage (PUT af key)
|
2016-11-20 16:08:16 +00:00
|
|
|
r <- net receiveMessage
|
2016-11-17 21:19:04 +00:00
|
|
|
case r of
|
2018-03-12 17:43:19 +00:00
|
|
|
Just (PUT_FROM offset) -> sendContent key af offset p
|
|
|
|
Just ALREADY_HAVE -> return True
|
2016-11-17 21:19:04 +00:00
|
|
|
_ -> do
|
2018-03-12 17:43:19 +00:00
|
|
|
net $ sendMessage (ERROR "expected PUT_FROM or ALREADY_HAVE")
|
2016-11-17 21:19:04 +00:00
|
|
|
return False
|
|
|
|
|
2016-12-02 19:34:15 +00:00
|
|
|
data ServerHandler a
|
|
|
|
= ServerGot a
|
|
|
|
| ServerContinue
|
|
|
|
| ServerUnexpected
|
|
|
|
|
|
|
|
-- Server loop, getting messages from the client and handling them
|
|
|
|
serverLoop :: (Message -> Proto (ServerHandler a)) -> Proto (Maybe a)
|
|
|
|
serverLoop a = do
|
2018-03-12 17:43:19 +00:00
|
|
|
mcmd <- net receiveMessage
|
|
|
|
case mcmd of
|
2016-12-02 19:34:15 +00:00
|
|
|
-- When the client sends ERROR to the server, the server
|
|
|
|
-- gives up, since it's not clear what state the client
|
|
|
|
-- is in, and so not possible to recover.
|
2018-03-12 17:43:19 +00:00
|
|
|
Just (ERROR _) -> return Nothing
|
|
|
|
-- When the client sends an unparseable message, the server
|
|
|
|
-- responds with an error message, and loops. This allows
|
|
|
|
-- expanding the protocol with new messages.
|
|
|
|
Nothing -> do
|
|
|
|
net $ sendMessage (ERROR "unknown command")
|
|
|
|
serverLoop a
|
|
|
|
Just cmd -> do
|
2016-12-02 19:34:15 +00:00
|
|
|
v <- a cmd
|
|
|
|
case v of
|
|
|
|
ServerGot r -> return (Just r)
|
|
|
|
ServerContinue -> serverLoop a
|
|
|
|
-- If the client sends an unexpected message,
|
|
|
|
-- the server will respond with ERROR, and
|
|
|
|
-- always continues processing messages.
|
|
|
|
--
|
|
|
|
-- Since the protocol is not versioned, this
|
|
|
|
-- is necessary to handle protocol changes
|
|
|
|
-- robustly, since the client can detect when
|
|
|
|
-- it's talking to a server that does not
|
|
|
|
-- support some new feature, and fall back.
|
|
|
|
ServerUnexpected -> do
|
|
|
|
net $ sendMessage (ERROR "unexpected command")
|
|
|
|
serverLoop a
|
|
|
|
|
|
|
|
-- | Serve the protocol, with an unauthenticated peer. Once the peer
|
|
|
|
-- successfully authenticates, returns their UUID.
|
|
|
|
serveAuth :: UUID -> Proto (Maybe UUID)
|
|
|
|
serveAuth myuuid = serverLoop handler
|
|
|
|
where
|
|
|
|
handler (AUTH theiruuid authtoken) = do
|
|
|
|
ok <- net $ checkAuthToken theiruuid authtoken
|
|
|
|
if ok
|
|
|
|
then do
|
|
|
|
net $ sendMessage (AUTH_SUCCESS myuuid)
|
|
|
|
return (ServerGot theiruuid)
|
|
|
|
else do
|
|
|
|
net $ sendMessage AUTH_FAILURE
|
|
|
|
return ServerContinue
|
|
|
|
handler _ = return ServerUnexpected
|
|
|
|
|
2018-05-25 17:17:56 +00:00
|
|
|
data ServerMode
|
|
|
|
= ServeReadOnly
|
|
|
|
-- ^ Allow reading, but not writing.
|
|
|
|
| ServeAppendOnly
|
|
|
|
-- ^ Allow reading, and storing new objects, but not deleting objects.
|
|
|
|
| ServeReadWrite
|
|
|
|
-- ^ Full read and write access.
|
|
|
|
deriving (Eq, Ord)
|
2018-03-07 17:15:55 +00:00
|
|
|
|
2016-12-02 19:34:15 +00:00
|
|
|
-- | Serve the protocol, with a peer that has authenticated.
|
2018-03-07 17:15:55 +00:00
|
|
|
serveAuthed :: ServerMode -> UUID -> Proto ()
|
|
|
|
serveAuthed servermode myuuid = void $ serverLoop handler
|
2016-11-17 21:19:04 +00:00
|
|
|
where
|
2018-03-07 17:15:55 +00:00
|
|
|
readonlyerror = net $ sendMessage (ERROR "this repository is read-only; write access denied")
|
2018-05-25 17:17:56 +00:00
|
|
|
appendonlyerror = net $ sendMessage (ERROR "this repository is append-only; removal denied")
|
2018-03-12 17:43:19 +00:00
|
|
|
handler (VERSION theirversion) = do
|
|
|
|
let v = min theirversion maxProtocolVersion
|
2018-03-12 19:19:40 +00:00
|
|
|
net $ setProtocolVersion v
|
2018-03-12 17:43:19 +00:00
|
|
|
net $ sendMessage (VERSION v)
|
|
|
|
return ServerContinue
|
2016-12-02 19:34:15 +00:00
|
|
|
handler (LOCKCONTENT key) = do
|
|
|
|
local $ tryLockContent key $ \locked -> do
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
sendSuccess locked
|
|
|
|
when locked $ do
|
2016-11-20 16:08:16 +00:00
|
|
|
r' <- net receiveMessage
|
Add content locking to P2P protocol
Is content locking needed in the P2P protocol? Based on re-reading
bugs/concurrent_drop--from_presence_checking_failures.mdwn,
I think so: Peers can form cycles, and multiple peers can all be trying
to drop the same content.
So, added content locking to the protocol, with some difficulty.
The implementation is fine as far as it goes, but note the warning
comment for lockContentWhile -- if the connection to the peer is dropped
unexpectedly, the peer will then unlock the content, and yet the local
side will still think it's locked.
To be honest I'm not sure if Remote.Git's lockKey for ssh remotes
doesn't have the same problem. It checks that the
"ssh remote git-annex-shell lockcontent"
process has not exited, but if the connection closes afer that check,
the lockcontent command will unlock it, and yet the local side will
still think it's locked.
Probably this needs to be fixed by eg, making lockcontent catch any
execptions due to the connection closing, and in that case, wait a
significantly long time before dropping the lock.
This commit was sponsored by Anthony DeRobertis on Patreon.
2016-11-18 05:32:24 +00:00
|
|
|
case r' of
|
2018-03-12 17:43:19 +00:00
|
|
|
Just UNLOCKCONTENT -> return ()
|
2016-11-20 16:08:16 +00:00
|
|
|
_ -> net $ sendMessage (ERROR "expected UNLOCKCONTENT")
|
2016-12-02 19:34:15 +00:00
|
|
|
return ServerContinue
|
|
|
|
handler (CHECKPRESENT key) = do
|
|
|
|
sendSuccess =<< local (checkContentPresent key)
|
|
|
|
return ServerContinue
|
2018-03-07 17:15:55 +00:00
|
|
|
handler (REMOVE key) = case servermode of
|
|
|
|
ServeReadWrite -> do
|
|
|
|
sendSuccess =<< local (removeContent key)
|
|
|
|
return ServerContinue
|
2018-05-25 17:17:56 +00:00
|
|
|
ServeAppendOnly -> do
|
|
|
|
appendonlyerror
|
|
|
|
return ServerContinue
|
2018-03-07 17:15:55 +00:00
|
|
|
ServeReadOnly -> do
|
|
|
|
readonlyerror
|
|
|
|
return ServerContinue
|
|
|
|
handler (PUT af key) = case servermode of
|
2018-05-25 17:17:56 +00:00
|
|
|
ServeReadWrite -> handleput af key
|
|
|
|
ServeAppendOnly -> handleput af key
|
2018-03-07 17:15:55 +00:00
|
|
|
ServeReadOnly -> do
|
|
|
|
readonlyerror
|
|
|
|
return ServerContinue
|
2016-12-02 20:39:01 +00:00
|
|
|
handler (GET offset key af) = do
|
2016-12-07 17:37:35 +00:00
|
|
|
void $ sendContent af key offset nullMeterUpdate
|
2016-11-18 00:54:14 +00:00
|
|
|
-- setPresent not called because the peer may have
|
2016-12-02 18:49:22 +00:00
|
|
|
-- requested the data but not permanently stored it.
|
2016-12-02 19:34:15 +00:00
|
|
|
return ServerContinue
|
|
|
|
handler (CONNECT service) = do
|
2018-03-07 17:15:55 +00:00
|
|
|
let goahead = net $ relayService service
|
|
|
|
case (servermode, service) of
|
|
|
|
(ServeReadWrite, _) -> goahead
|
2018-05-25 17:17:56 +00:00
|
|
|
(ServeAppendOnly, UploadPack) -> goahead
|
|
|
|
-- git protocol could be used to overwrite
|
|
|
|
-- refs or something, so don't allow
|
|
|
|
(ServeAppendOnly, ReceivePack) -> readonlyerror
|
2018-03-07 17:15:55 +00:00
|
|
|
(ServeReadOnly, UploadPack) -> goahead
|
|
|
|
(ServeReadOnly, ReceivePack) -> readonlyerror
|
2016-12-09 21:11:16 +00:00
|
|
|
-- After connecting to git, there may be unconsumed data
|
|
|
|
-- from the git processes hanging around (even if they
|
|
|
|
-- exited successfully), so stop serving this connection.
|
|
|
|
return $ ServerGot ()
|
2016-12-09 18:52:38 +00:00
|
|
|
handler NOTIFYCHANGE = do
|
|
|
|
refs <- local waitRefChange
|
|
|
|
net $ sendMessage (CHANGED refs)
|
|
|
|
return ServerContinue
|
2016-12-02 19:34:15 +00:00
|
|
|
handler _ = return ServerUnexpected
|
2016-11-17 21:19:04 +00:00
|
|
|
|
2018-05-25 17:17:56 +00:00
|
|
|
handleput af key = do
|
|
|
|
have <- local $ checkContentPresent key
|
|
|
|
if have
|
|
|
|
then net $ sendMessage ALREADY_HAVE
|
|
|
|
else do
|
|
|
|
let sizer = tmpContentSize key
|
|
|
|
let storer = \o l b v -> unVerified $
|
|
|
|
storeContent key af o l b v
|
|
|
|
(ok, _v) <- receiveContent Nothing nullMeterUpdate sizer storer PUT_FROM
|
|
|
|
when ok $
|
|
|
|
local $ setPresent key myuuid
|
|
|
|
return ServerContinue
|
|
|
|
|
2016-12-07 17:37:35 +00:00
|
|
|
sendContent :: Key -> AssociatedFile -> Offset -> MeterUpdate -> Proto Bool
|
2016-12-08 23:56:02 +00:00
|
|
|
sendContent key af offset@(Offset n) p = go =<< local (contentSize key)
|
|
|
|
where
|
2018-03-13 18:18:30 +00:00
|
|
|
go Nothing = sender (Len 0) L.empty (return Valid)
|
2016-12-08 23:56:02 +00:00
|
|
|
go (Just (Len totallen)) = do
|
|
|
|
let len = totallen - n
|
|
|
|
if len <= 0
|
2018-03-13 18:18:30 +00:00
|
|
|
then sender (Len 0) L.empty (return Valid)
|
2016-12-08 23:56:02 +00:00
|
|
|
else local $ readContent key af offset $
|
|
|
|
sender (Len len)
|
2018-03-13 18:18:30 +00:00
|
|
|
sender len content validitycheck = do
|
2016-12-08 23:56:02 +00:00
|
|
|
let p' = offsetMeterUpdate p (toBytesProcessed n)
|
|
|
|
net $ sendMessage (DATA len)
|
|
|
|
net $ sendBytes len content p'
|
2018-03-13 18:18:30 +00:00
|
|
|
ver <- net getProtocolVersion
|
|
|
|
when (ver >= ProtocolVersion 1) $
|
|
|
|
net . sendMessage . VALIDITY =<< validitycheck
|
2016-12-08 23:56:02 +00:00
|
|
|
checkSuccess
|
2016-11-17 21:19:04 +00:00
|
|
|
|
2018-03-13 18:18:30 +00:00
|
|
|
receiveContent
|
|
|
|
:: Maybe Meter
|
|
|
|
-> MeterUpdate
|
|
|
|
-> Local Len
|
|
|
|
-> (Offset -> Len -> Proto L.ByteString -> Proto (Maybe Validity) -> Local (Bool, Verification))
|
|
|
|
-> (Offset -> Message)
|
|
|
|
-> Proto (Bool, Verification)
|
2018-03-13 01:46:58 +00:00
|
|
|
receiveContent mm p sizer storer mkmsg = do
|
2016-12-06 19:05:44 +00:00
|
|
|
Len n <- local sizer
|
2016-12-07 18:52:10 +00:00
|
|
|
let p' = offsetMeterUpdate p (toBytesProcessed n)
|
2016-11-18 01:04:35 +00:00
|
|
|
let offset = Offset n
|
2016-11-20 16:08:16 +00:00
|
|
|
net $ sendMessage (mkmsg offset)
|
|
|
|
r <- net receiveMessage
|
2016-11-18 01:04:35 +00:00
|
|
|
case r of
|
2018-03-13 01:46:58 +00:00
|
|
|
Just (DATA len@(Len l)) -> do
|
|
|
|
local $ case mm of
|
|
|
|
Nothing -> return ()
|
|
|
|
Just m -> updateMeterTotalSize m (n+l)
|
2018-03-13 18:18:30 +00:00
|
|
|
ver <- net getProtocolVersion
|
|
|
|
let validitycheck = if ver >= ProtocolVersion 1
|
|
|
|
then net receiveMessage >>= \case
|
|
|
|
Just (VALIDITY v) -> return (Just v)
|
|
|
|
_ -> do
|
|
|
|
net $ sendMessage (ERROR "expected VALID or INVALID")
|
|
|
|
return Nothing
|
|
|
|
else return Nothing
|
|
|
|
(ok, v) <- local $ storer offset len
|
2016-12-08 22:26:03 +00:00
|
|
|
(net (receiveBytes len p'))
|
2018-03-13 18:18:30 +00:00
|
|
|
validitycheck
|
2016-11-18 02:06:59 +00:00
|
|
|
sendSuccess ok
|
2018-03-13 18:18:30 +00:00
|
|
|
return (ok, v)
|
2016-11-18 01:04:35 +00:00
|
|
|
_ -> do
|
2016-11-20 16:08:16 +00:00
|
|
|
net $ sendMessage (ERROR "expected DATA")
|
2018-03-13 18:18:30 +00:00
|
|
|
return (False, UnVerified)
|
2016-11-17 21:19:04 +00:00
|
|
|
|
2016-11-18 01:48:59 +00:00
|
|
|
checkSuccess :: Proto Bool
|
|
|
|
checkSuccess = do
|
2016-11-20 16:08:16 +00:00
|
|
|
ack <- net receiveMessage
|
2016-11-18 01:48:59 +00:00
|
|
|
case ack of
|
2018-03-12 17:43:19 +00:00
|
|
|
Just SUCCESS -> return True
|
|
|
|
Just FAILURE -> return False
|
2016-11-18 01:48:59 +00:00
|
|
|
_ -> do
|
2016-11-20 16:08:16 +00:00
|
|
|
net $ sendMessage (ERROR "expected SUCCESS or FAILURE")
|
2016-11-18 01:48:59 +00:00
|
|
|
return False
|
|
|
|
|
2016-11-18 02:06:59 +00:00
|
|
|
sendSuccess :: Bool -> Proto ()
|
2016-11-20 16:08:16 +00:00
|
|
|
sendSuccess True = net $ sendMessage SUCCESS
|
|
|
|
sendSuccess False = net $ sendMessage FAILURE
|
2016-11-18 02:06:59 +00:00
|
|
|
|
2016-12-09 20:02:43 +00:00
|
|
|
notifyChange :: Proto (Maybe ChangedRefs)
|
|
|
|
notifyChange = do
|
|
|
|
net $ sendMessage NOTIFYCHANGE
|
|
|
|
ack <- net receiveMessage
|
|
|
|
case ack of
|
2018-03-12 17:43:19 +00:00
|
|
|
Just (CHANGED rs) -> return (Just rs)
|
2016-12-09 20:02:43 +00:00
|
|
|
_ -> do
|
|
|
|
net $ sendMessage (ERROR "expected CHANGED")
|
|
|
|
return Nothing
|
|
|
|
|
2016-11-20 16:08:16 +00:00
|
|
|
connect :: Service -> Handle -> Handle -> Proto ExitCode
|
|
|
|
connect service hin hout = do
|
|
|
|
net $ sendMessage (CONNECT service)
|
2016-11-21 23:24:55 +00:00
|
|
|
net $ relay (RelayHandle hin) (RelayHandle hout)
|
|
|
|
|
|
|
|
data RelayData
|
|
|
|
= RelayToPeer L.ByteString
|
|
|
|
| RelayFromPeer L.ByteString
|
|
|
|
| RelayDone ExitCode
|
|
|
|
deriving (Show)
|
|
|
|
|
|
|
|
relayFromPeer :: Net RelayData
|
|
|
|
relayFromPeer = do
|
|
|
|
r <- receiveMessage
|
|
|
|
case r of
|
2018-03-12 17:43:19 +00:00
|
|
|
Just (CONNECTDONE exitcode) -> return $ RelayDone exitcode
|
|
|
|
Just (DATA len) -> RelayFromPeer <$> receiveBytes len nullMeterUpdate
|
2016-11-21 23:24:55 +00:00
|
|
|
_ -> do
|
|
|
|
sendMessage $ ERROR "expected DATA or CONNECTDONE"
|
|
|
|
return $ RelayDone $ ExitFailure 1
|
|
|
|
|
|
|
|
relayToPeer :: RelayData -> Net ()
|
|
|
|
relayToPeer (RelayDone exitcode) = sendMessage (CONNECTDONE exitcode)
|
|
|
|
relayToPeer (RelayToPeer b) = do
|
2016-11-20 16:08:16 +00:00
|
|
|
let len = Len $ fromIntegral $ L.length b
|
|
|
|
sendMessage (DATA len)
|
2016-12-07 17:37:35 +00:00
|
|
|
sendBytes len b nullMeterUpdate
|
2016-11-21 23:24:55 +00:00
|
|
|
relayToPeer (RelayFromPeer _) = return ()
|