2016-12-02 17:50:56 +00:00
|
|
|
{- P2P protocol, Annex implementation
|
|
|
|
-
|
2022-01-07 17:17:43 +00:00
|
|
|
- Copyright 2016-2022 Joey Hess <id@joeyh.name>
|
2016-12-02 17:50:56 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2016-12-02 17:50:56 +00:00
|
|
|
-}
|
|
|
|
|
2018-03-06 19:14:53 +00:00
|
|
|
{-# LANGUAGE RankNTypes, FlexibleContexts #-}
|
2016-12-02 17:50:56 +00:00
|
|
|
|
|
|
|
module P2P.Annex
|
2018-03-12 17:43:19 +00:00
|
|
|
( RunState(..)
|
|
|
|
, mkRunState
|
2016-12-06 19:40:31 +00:00
|
|
|
, P2PConnection(..)
|
2016-12-02 17:50:56 +00:00
|
|
|
, runFullProto
|
|
|
|
) where
|
|
|
|
|
|
|
|
import Annex.Common
|
|
|
|
import Annex.Content
|
2016-12-02 20:39:01 +00:00
|
|
|
import Annex.Transfer
|
2016-12-09 18:52:38 +00:00
|
|
|
import Annex.ChangedRefs
|
2016-12-02 17:50:56 +00:00
|
|
|
import P2P.Protocol
|
|
|
|
import P2P.IO
|
2016-12-02 18:49:22 +00:00
|
|
|
import Logs.Location
|
|
|
|
import Types.NumCopies
|
2016-12-08 23:56:02 +00:00
|
|
|
import Utility.Metered
|
2021-07-27 18:07:23 +00:00
|
|
|
import Annex.Verify
|
2016-12-02 17:50:56 +00:00
|
|
|
|
|
|
|
import Control.Monad.Free
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
import Control.Concurrent.STM
|
2021-02-09 21:03:27 +00:00
|
|
|
import qualified Data.ByteString as S
|
2016-12-02 19:34:15 +00:00
|
|
|
|
2016-12-02 17:50:56 +00:00
|
|
|
-- Full interpreter for Proto, that can receive and send objects.
|
2018-09-25 20:49:59 +00:00
|
|
|
runFullProto :: RunState -> P2PConnection -> Proto a -> Annex (Either ProtoFailure a)
|
2018-03-12 17:43:19 +00:00
|
|
|
runFullProto runst conn = go
|
2016-12-02 17:50:56 +00:00
|
|
|
where
|
|
|
|
go :: RunProto Annex
|
2016-12-10 15:12:18 +00:00
|
|
|
go (Pure v) = return (Right v)
|
2018-03-12 19:19:40 +00:00
|
|
|
go (Free (Net n)) = runNet runst conn go n
|
2018-03-12 17:43:19 +00:00
|
|
|
go (Free (Local l)) = runLocal runst go l
|
2016-12-02 17:50:56 +00:00
|
|
|
|
2018-09-25 20:49:59 +00:00
|
|
|
runLocal :: RunState -> RunProto Annex -> LocalF (Proto a) -> Annex (Either ProtoFailure a)
|
2018-03-12 17:43:19 +00:00
|
|
|
runLocal runst runner a = case a of
|
2016-12-02 17:50:56 +00:00
|
|
|
TmpContentSize k next -> do
|
|
|
|
tmp <- fromRepo $ gitAnnexTmpObjectLocation k
|
2020-11-05 15:26:34 +00:00
|
|
|
size <- liftIO $ catchDefaultIO 0 $ getFileSize tmp
|
2016-12-02 17:50:56 +00:00
|
|
|
runner (next (Len size))
|
2016-12-06 19:05:44 +00:00
|
|
|
FileSize f next -> do
|
2020-11-05 15:26:34 +00:00
|
|
|
size <- liftIO $ catchDefaultIO 0 $ getFileSize (toRawFilePath f)
|
2016-12-06 19:05:44 +00:00
|
|
|
runner (next (Len size))
|
2016-12-02 18:49:22 +00:00
|
|
|
ContentSize k next -> do
|
2020-11-05 15:26:34 +00:00
|
|
|
let getsize = liftIO . catchMaybeIO . getFileSize
|
2016-12-02 18:49:22 +00:00
|
|
|
size <- inAnnex' isJust Nothing getsize k
|
|
|
|
runner (next (Len <$> size))
|
2024-07-01 14:42:27 +00:00
|
|
|
ReadContent k af o offset sender next -> do
|
Fix a P2P protocol hang
When readContent got Nothing from prepSendAnnex, it did not run its
callback, and the callback is what sends the DATA reply.
sendContent checks with contentSize that the object file is present, but
that doesn't really guarantee that prepSendAnnex won't return Nothing.
So, it was possible for a P2P protocol GET to not receive a response,
and appear to hang. When what it's really doing is waiting for the next
protocol command.
This seems most likely to happen when the annex is in direct mode, and the
file being requested has been modified. It could also happen in an indirect
mode repository if genInodeCache somehow failed. Perhaps due to a race
with a drop of the content file.
Fixed by making readContent behave the way its spec said it should,
and run the callback with L.empty in this case.
Note that, it's finee for readContent to send any amount of data
to the callback, including L.empty. sendBytes deals with that
by making sure it sends exactly the specified number of bytes,
aborting the protocol if it's too short. So, when L.empty is sent,
the protocol will end up aborting.
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-02 17:41:50 +00:00
|
|
|
let proceed c = do
|
|
|
|
r <- tryNonAsync c
|
|
|
|
case r of
|
|
|
|
Left e -> return $ Left $ ProtoFailureException e
|
|
|
|
Right (Left e) -> return $ Left e
|
|
|
|
Right (Right ok) -> runner (next ok)
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
-- If the content is not present, or the transfer doesn't
|
2018-11-12 15:53:44 +00:00
|
|
|
-- run for any other reason, the sender action still must
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
-- be run, so is given empty and Invalid data.
|
|
|
|
let fallback = runner (sender mempty (return Invalid))
|
2024-07-01 14:42:27 +00:00
|
|
|
v <- tryNonAsync $ prepSendAnnex k o
|
2016-12-02 18:49:22 +00:00
|
|
|
case v of
|
disk free checking for unsized keys
Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.
Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.
Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.
(It does seem like it would be possible to implement the same thing for
directory special remotes though.)
In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.
Sponsored-by: Brock Spratlen on Patreon
2024-01-16 18:29:10 +00:00
|
|
|
Right (Just (f, _sz, checkchanged)) -> proceed $ do
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
-- alwaysUpload to allow multiple uploads of the same key.
|
2021-02-03 19:35:32 +00:00
|
|
|
let runtransfer ti = transfer alwaysUpload k af Nothing $ \p ->
|
2024-07-01 14:42:27 +00:00
|
|
|
sinkfile f offset checkchanged sender p ti
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
checktransfer runtransfer fallback
|
|
|
|
Right Nothing -> proceed fallback
|
2018-09-25 20:49:59 +00:00
|
|
|
Left e -> return $ Left $ ProtoFailureException e
|
2018-03-13 18:18:30 +00:00
|
|
|
StoreContent k af o l getb validitycheck next -> do
|
2018-06-21 17:34:11 +00:00
|
|
|
-- This is the same as the retrievalSecurityPolicy of
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
-- Remote.P2P and Remote.Git.
|
2018-06-21 17:34:11 +00:00
|
|
|
let rsp = RetrievalAllKeysSecure
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
v <- tryNonAsync $ do
|
2021-02-10 16:41:05 +00:00
|
|
|
iv <- startVerifyKeyContentIncrementally DefaultVerify k
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
let runtransfer ti =
|
2021-02-03 19:35:32 +00:00
|
|
|
Right <$> transfer download' k af Nothing (\p ->
|
disk free checking for unsized keys
Improve disk free space checking when transferring unsized keys to
local git remotes. Since the size of the object file is known, can
check that instead.
Getting unsized keys from local git remotes does not check the actual
object size. It would be harder to handle that direction because the size
check is run locally, before anything involving the remote is done. So it
doesn't know the size of the file on the remote.
Also, transferring unsized keys to other remotes, including ssh remotes and
p2p remotes don't do disk size checking for unsized keys. This would need a
change in protocol.
(It does seem like it would be possible to implement the same thing for
directory special remotes though.)
In some sense, it might be better to not ever do disk free checking for
unsized keys, than to do it only sometimes. A user might notice this
direction working and consider it a bug that the other direction does not.
On the other hand, disk reserve checking is not implemented for most
special remotes at all, and yet it is implemented for a few, which is also
inconsistent, but best effort. And so doing this best effort seems to make
some sense. Fundamentally, if the user wants the size to always be checked,
they should not use unsized keys.
Sponsored-by: Brock Spratlen on Patreon
2024-01-16 18:29:10 +00:00
|
|
|
logStatusAfter k $ getViaTmp rsp DefaultVerify k af Nothing $ \tmp ->
|
2021-02-10 16:41:05 +00:00
|
|
|
storefile (fromRawFilePath tmp) o l getb iv validitycheck p ti)
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
let fallback = return $ Left $
|
|
|
|
ProtoFailureMessage "transfer already in progress, or unable to take transfer lock"
|
|
|
|
checktransfer runtransfer fallback
|
|
|
|
case v of
|
|
|
|
Left e -> return $ Left $ ProtoFailureException e
|
|
|
|
Right (Left e) -> return $ Left e
|
|
|
|
Right (Right ok) -> runner (next ok)
|
2021-02-10 16:41:05 +00:00
|
|
|
StoreContentTo dest iv o l getb validitycheck next -> do
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
v <- tryNonAsync $ do
|
|
|
|
let runtransfer ti = Right
|
2021-02-10 16:41:05 +00:00
|
|
|
<$> storefile dest o l getb iv validitycheck nullMeterUpdate ti
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
let fallback = return $ Left $
|
2021-03-06 21:47:05 +00:00
|
|
|
ProtoFailureMessage "Transfer failed"
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
checktransfer runtransfer fallback
|
|
|
|
case v of
|
|
|
|
Left e -> return $ Left $ ProtoFailureException e
|
|
|
|
Right (Left e) -> return $ Left e
|
|
|
|
Right (Right ok) -> runner (next ok)
|
2016-12-02 18:49:22 +00:00
|
|
|
SetPresent k u next -> do
|
|
|
|
v <- tryNonAsync $ logChange k u InfoPresent
|
|
|
|
case v of
|
2018-09-25 20:49:59 +00:00
|
|
|
Left e -> return $ Left $ ProtoFailureException e
|
2016-12-02 18:49:22 +00:00
|
|
|
Right () -> runner next
|
|
|
|
CheckContentPresent k next -> do
|
|
|
|
v <- tryNonAsync $ inAnnex k
|
|
|
|
case v of
|
2018-09-25 20:49:59 +00:00
|
|
|
Left e -> return $ Left $ ProtoFailureException e
|
2016-12-02 18:49:22 +00:00
|
|
|
Right result -> runner (next result)
|
|
|
|
RemoveContent k next -> do
|
2020-07-25 15:54:34 +00:00
|
|
|
let cleanup = do
|
|
|
|
logStatus k InfoMissing
|
|
|
|
return True
|
2016-12-09 16:47:57 +00:00
|
|
|
v <- tryNonAsync $
|
2016-12-09 16:54:12 +00:00
|
|
|
ifM (Annex.Content.inAnnex k)
|
2020-07-25 15:54:34 +00:00
|
|
|
( lockContentForRemoval k cleanup $ \contentlock -> do
|
2016-12-09 16:47:57 +00:00
|
|
|
removeAnnex contentlock
|
2020-07-25 15:54:34 +00:00
|
|
|
cleanup
|
2016-12-09 16:47:57 +00:00
|
|
|
, return True
|
|
|
|
)
|
2016-12-02 18:49:22 +00:00
|
|
|
case v of
|
2018-09-25 20:49:59 +00:00
|
|
|
Left e -> return $ Left $ ProtoFailureException e
|
2016-12-02 18:49:22 +00:00
|
|
|
Right result -> runner (next result)
|
|
|
|
TryLockContent k protoaction next -> do
|
|
|
|
v <- tryNonAsync $ lockContentShared k $ \verifiedcopy ->
|
|
|
|
case verifiedcopy of
|
|
|
|
LockedCopy _ -> runner (protoaction True)
|
|
|
|
_ -> runner (protoaction False)
|
|
|
|
-- If locking fails, lockContentShared throws an exception.
|
|
|
|
-- Let the peer know it failed.
|
|
|
|
case v of
|
|
|
|
Left _ -> runner $ do
|
|
|
|
protoaction False
|
|
|
|
next
|
|
|
|
Right _ -> runner next
|
2018-03-12 17:43:19 +00:00
|
|
|
WaitRefChange next -> case runst of
|
|
|
|
Serving _ (Just h) _ -> do
|
2016-12-09 19:08:54 +00:00
|
|
|
v <- tryNonAsync $ liftIO $ waitChangedRefs h
|
|
|
|
case v of
|
2018-09-25 20:49:59 +00:00
|
|
|
Left e -> return $ Left $ ProtoFailureException e
|
2016-12-09 19:08:54 +00:00
|
|
|
Right changedrefs -> runner (next changedrefs)
|
2018-09-25 20:49:59 +00:00
|
|
|
_ -> return $ Left $
|
|
|
|
ProtoFailureMessage "change notification not available"
|
2018-03-13 01:46:58 +00:00
|
|
|
UpdateMeterTotalSize m sz next -> do
|
|
|
|
liftIO $ setMeterTotalSize m sz
|
|
|
|
runner next
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
RunValidityCheck checkaction next -> runner . next =<< checkaction
|
2016-12-02 20:39:01 +00:00
|
|
|
where
|
2021-02-03 19:35:32 +00:00
|
|
|
transfer mk k af sd ta = case runst of
|
2016-12-02 20:39:01 +00:00
|
|
|
-- Update transfer logs when serving.
|
2018-03-29 17:04:07 +00:00
|
|
|
-- Using noRetry because we're the sender.
|
2018-03-12 17:43:19 +00:00
|
|
|
Serving theiruuid _ _ ->
|
2021-02-03 19:35:32 +00:00
|
|
|
mk theiruuid k af sd noRetry ta noNotification
|
2016-12-02 20:39:01 +00:00
|
|
|
-- Transfer logs are updated higher in the stack when
|
|
|
|
-- a client.
|
2018-03-12 17:43:19 +00:00
|
|
|
Client _ -> ta nullMeterUpdate
|
2021-02-09 21:03:27 +00:00
|
|
|
|
|
|
|
resumefromoffset o incrementalverifier p h
|
|
|
|
| o /= 0 = do
|
|
|
|
p' <- case incrementalverifier of
|
|
|
|
Just iv -> do
|
|
|
|
go iv o
|
|
|
|
return p
|
|
|
|
_ -> return $ offsetMeterUpdate p (toBytesProcessed o)
|
|
|
|
-- Make sure the handle is seeked to the offset.
|
|
|
|
-- (Reading the file probably left it there
|
|
|
|
-- when that was done, but let's be sure.)
|
|
|
|
hSeek h AbsoluteSeek o
|
|
|
|
return p'
|
|
|
|
| otherwise = return p
|
|
|
|
where
|
|
|
|
go iv n
|
|
|
|
| n == 0 = return ()
|
2021-02-10 02:15:33 +00:00
|
|
|
| otherwise = do
|
|
|
|
let c = if n > fromIntegral defaultChunkSize
|
|
|
|
then defaultChunkSize
|
|
|
|
else fromIntegral n
|
|
|
|
b <- S.hGet h c
|
2021-11-09 16:29:09 +00:00
|
|
|
updateIncrementalVerifier iv b
|
2021-02-10 02:15:33 +00:00
|
|
|
unless (b == S.empty) $
|
|
|
|
go iv (n - fromIntegral (S.length b))
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
|
2021-02-09 21:03:27 +00:00
|
|
|
storefile dest (Offset o) (Len l) getb incrementalverifier validitycheck p ti = do
|
2016-12-08 22:26:03 +00:00
|
|
|
v <- runner getb
|
|
|
|
case v of
|
2018-03-13 18:18:30 +00:00
|
|
|
Right b -> do
|
|
|
|
liftIO $ withBinaryFile dest ReadWriteMode $ \h -> do
|
2021-02-09 21:03:27 +00:00
|
|
|
p' <- resumefromoffset o incrementalverifier p h
|
|
|
|
let writechunk = case incrementalverifier of
|
|
|
|
Nothing -> \c -> S.hPut h c
|
|
|
|
Just iv -> \c -> do
|
|
|
|
S.hPut h c
|
2021-11-09 16:29:09 +00:00
|
|
|
updateIncrementalVerifier iv c
|
2021-02-09 21:03:27 +00:00
|
|
|
meteredWrite p' writechunk b
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
indicatetransferred ti
|
|
|
|
|
2018-03-13 18:18:30 +00:00
|
|
|
rightsize <- do
|
2020-11-05 15:26:34 +00:00
|
|
|
sz <- liftIO $ getFileSize (toRawFilePath dest)
|
2018-03-13 18:18:30 +00:00
|
|
|
return (toInteger sz == l + o)
|
|
|
|
|
|
|
|
runner validitycheck >>= \case
|
2021-02-09 21:03:27 +00:00
|
|
|
Right (Just Valid) -> case incrementalverifier of
|
2021-08-18 17:54:40 +00:00
|
|
|
Just iv
|
2021-11-09 16:29:09 +00:00
|
|
|
| rightsize -> liftIO (finalizeIncrementalVerifier iv) >>= \case
|
2021-08-18 17:54:40 +00:00
|
|
|
Nothing -> return (True, UnVerified)
|
|
|
|
Just True -> return (True, Verified)
|
2022-01-07 17:17:43 +00:00
|
|
|
Just False -> do
|
|
|
|
verificationOfContentFailed (toRawFilePath dest)
|
|
|
|
return (False, UnVerified)
|
2021-08-18 17:54:40 +00:00
|
|
|
| otherwise -> return (False, UnVerified)
|
2021-02-09 21:03:27 +00:00
|
|
|
Nothing -> return (rightsize, UnVerified)
|
2020-12-01 20:05:55 +00:00
|
|
|
Right (Just Invalid) | l == 0 ->
|
|
|
|
-- Special case, for when
|
|
|
|
-- content was not
|
|
|
|
-- available to send,
|
|
|
|
-- which is indicated by
|
|
|
|
-- sending 0 bytes and
|
|
|
|
-- Invalid.
|
|
|
|
return (False, UnVerified)
|
2018-03-13 18:18:30 +00:00
|
|
|
_ -> do
|
|
|
|
-- Invalid, or old protocol
|
|
|
|
-- version. Validity is not
|
|
|
|
-- known. Force content
|
|
|
|
-- verification.
|
|
|
|
return (rightsize, MustVerify)
|
2023-04-10 17:38:14 +00:00
|
|
|
Left e -> giveup $ describeProtoFailure e
|
2016-12-08 23:56:02 +00:00
|
|
|
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
sinkfile f (Offset o) checkchanged sender p ti = bracket setup cleanup go
|
2016-12-08 23:56:02 +00:00
|
|
|
where
|
|
|
|
setup = liftIO $ openBinaryFile f ReadMode
|
|
|
|
cleanup = liftIO . hClose
|
|
|
|
go h = do
|
|
|
|
let p' = offsetMeterUpdate p (toBytesProcessed o)
|
|
|
|
when (o /= 0) $
|
|
|
|
liftIO $ hSeek h AbsoluteSeek o
|
|
|
|
b <- liftIO $ hGetContentsMetered h p'
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
|
2018-03-13 18:18:30 +00:00
|
|
|
let validitycheck = local $ runValidityCheck $
|
|
|
|
checkchanged >>= return . \case
|
|
|
|
False -> Invalid
|
|
|
|
True -> Valid
|
Fixed some other potential hangs in the P2P protocol
Finishes the start made in 983c9d5a53189f71797591692c0ed675f5bd1c16, by
handling the case where `transfer` fails for some other reason, and so the
ReadContent callback does not get run. I don't know of a case where
`transfer` does fail other than the locking dealt with in that commit, but
it's good to have a guarantee.
StoreContent and StoreContentTo had a similar problem.
Things like `getViaTmp` may decide not to run the transfer action.
And `transfer` could certianly fail, if another transfer of the same
object was in progress. (Or a different object when annex.pidlock is set.)
If the transfer action was not run, the content of the object would
not all get consumed, and so would get interpreted as protocol commands,
which would not go well.
My approach to fixing all of these things is to set a TVar only
once all the data in the transfer is known to have been read/written.
This way the internals of `transfer`, `getViaTmp` etc don't matter.
So in ReadContent, it checks if the transfer completed.
If not, as long as it didn't throw an exception, send empty and Invalid
data to the callback. On an exception the state of the protocol is unknown
so it has to raise ProtoFailureException and close the connection,
same as before.
In StoreContent, if the transfer did not complete
some portion of the DATA has been read, so the protocol is in an unknown
state and it has to close the conection as well.
(The ProtoFailureMessage used here matches the one in Annex.Transfer, which
is the most likely reason. Not ideal to duplicate it..)
StoreContent did not ever close the protocol connection before. So this is
a protocol change, but only in an exceptional circumstance, and it's not
going to break anything, because clients already need to deal with the
connection breaking at any point.
The way this new behavior looks (here origin has annex.pidlock = true so will
only accept one upload to it at a time):
git annex copy --to origin -J2
copy x (to origin...) ok
copy y (to origin...)
Lost connection (fd:25: hGetChar: end of file)
This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
2018-11-06 18:44:00 +00:00
|
|
|
r <- runner (sender b validitycheck)
|
|
|
|
indicatetransferred ti
|
|
|
|
return r
|
|
|
|
|
|
|
|
-- This allows using actions like download and viaTmp
|
|
|
|
-- that may abort a transfer, and clean up the protocol after them.
|
|
|
|
--
|
|
|
|
-- Runs an action that may make a transfer, passing a transfer
|
|
|
|
-- indicator. The action should call indicatetransferred on it,
|
|
|
|
-- only after it's actually sent/received the all data.
|
|
|
|
--
|
|
|
|
-- If the action ends without having called indicatetransferred,
|
|
|
|
-- runs the fallback action, which can close the protoocol
|
|
|
|
-- connection or otherwise clean up after the transfer not having
|
|
|
|
-- occurred.
|
|
|
|
--
|
|
|
|
-- If the action throws an exception, the fallback is not run.
|
|
|
|
checktransfer ta fallback = do
|
|
|
|
ti <- liftIO $ newTVarIO False
|
|
|
|
r <- ta ti
|
|
|
|
ifM (liftIO $ atomically $ readTVar ti)
|
|
|
|
( return r
|
|
|
|
, fallback
|
|
|
|
)
|
|
|
|
|
|
|
|
indicatetransferred ti = liftIO $ atomically $ writeTVar ti True
|