2020-12-09 17:21:20 +00:00
|
|
|
{- A pool of "git-annex transferrer" processes
|
2013-03-19 22:46:29 +00:00
|
|
|
-
|
2022-09-23 19:55:40 +00:00
|
|
|
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
|
2013-03-19 22:46:29 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2013-03-19 22:46:29 +00:00
|
|
|
-}
|
|
|
|
|
2021-04-05 17:40:31 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2020-12-07 16:50:48 +00:00
|
|
|
{-# LANGUAGE RankNTypes #-}
|
2020-12-11 19:28:58 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
2013-03-19 22:46:29 +00:00
|
|
|
|
2020-12-07 16:50:48 +00:00
|
|
|
module Annex.TransferrerPool where
|
|
|
|
|
|
|
|
import Annex.Common
|
2020-12-07 20:11:29 +00:00
|
|
|
import qualified Annex
|
2020-12-07 16:50:48 +00:00
|
|
|
import Types.TransferrerPool
|
2020-12-09 17:28:16 +00:00
|
|
|
import Types.Transferrer
|
2016-08-03 16:37:12 +00:00
|
|
|
import Types.Transfer
|
2020-12-07 20:11:29 +00:00
|
|
|
import qualified Types.Remote as Remote
|
|
|
|
import Types.Messages
|
2020-12-11 19:28:58 +00:00
|
|
|
import Types.CleanupActions
|
2020-12-04 17:50:03 +00:00
|
|
|
import Messages.Serialized
|
2020-12-07 20:11:29 +00:00
|
|
|
import Annex.Path
|
2021-02-03 17:19:47 +00:00
|
|
|
import Annex.StallDetection
|
2022-09-23 19:55:40 +00:00
|
|
|
import Annex.Link
|
2020-12-07 20:11:29 +00:00
|
|
|
import Utility.Batch
|
2020-12-08 19:22:18 +00:00
|
|
|
import Utility.Metered
|
2020-12-09 19:44:00 +00:00
|
|
|
import qualified Utility.SimpleProtocol as Proto
|
2013-03-19 22:46:29 +00:00
|
|
|
|
|
|
|
import Control.Concurrent
|
2020-12-08 19:22:18 +00:00
|
|
|
import Control.Concurrent.Async
|
|
|
|
import Control.Concurrent.STM hiding (check)
|
2020-12-07 16:50:48 +00:00
|
|
|
import Control.Monad.IO.Class (MonadIO)
|
2020-12-11 19:28:58 +00:00
|
|
|
import qualified Data.Map as M
|
|
|
|
#ifndef mingw32_HOST_OS
|
|
|
|
import System.Posix.Signals
|
|
|
|
import System.Posix.Process (getProcessGroupIDOf)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
type SignalActionsVar = TVar (M.Map SignalAction (Int -> IO ()))
|
2013-03-19 22:46:29 +00:00
|
|
|
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
data RunTransferrer = RunTransferrer String [CommandParam] BatchCommandMaker
|
|
|
|
|
|
|
|
mkRunTransferrer :: BatchCommandMaker -> Annex RunTransferrer
|
|
|
|
mkRunTransferrer batchmaker = RunTransferrer
|
|
|
|
<$> liftIO programPath
|
|
|
|
<*> gitAnnexChildProcessParams "transferrer" []
|
|
|
|
<*> pure batchmaker
|
|
|
|
|
2020-12-07 20:11:29 +00:00
|
|
|
{- Runs an action with a Transferrer from the pool. -}
|
|
|
|
withTransferrer :: (Transferrer -> Annex a) -> Annex a
|
|
|
|
withTransferrer a = do
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
rt <- mkRunTransferrer nonBatchCommandMaker
|
2021-04-02 19:26:21 +00:00
|
|
|
pool <- Annex.getRead Annex.transferrerpool
|
2020-12-07 20:11:29 +00:00
|
|
|
let nocheck = pure (pure True)
|
2021-04-02 19:26:21 +00:00
|
|
|
signalactonsvar <- Annex.getRead Annex.signalactions
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
withTransferrer' False signalactonsvar nocheck rt pool a
|
2020-12-07 20:11:29 +00:00
|
|
|
|
|
|
|
withTransferrer'
|
2020-12-09 16:43:38 +00:00
|
|
|
:: (MonadIO m, MonadMask m)
|
2020-12-07 20:11:29 +00:00
|
|
|
=> Bool
|
|
|
|
-- ^ When minimizeprocesses is True, only one Transferrer is left
|
|
|
|
-- running in the pool at a time. So if this needed to start a
|
|
|
|
-- new Transferrer, it's stopped when done. Otherwise, idle
|
|
|
|
-- processes are left in the pool for use later.
|
2020-12-11 19:28:58 +00:00
|
|
|
-> SignalActionsVar
|
2020-12-07 20:11:29 +00:00
|
|
|
-> MkCheckTransferrer
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
-> RunTransferrer
|
2020-12-07 20:11:29 +00:00
|
|
|
-> TransferrerPool
|
|
|
|
-> (Transferrer -> m a)
|
|
|
|
-> m a
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
withTransferrer' minimizeprocesses signalactonsvar mkcheck rt pool a = do
|
2020-12-07 20:11:29 +00:00
|
|
|
(mi, leftinpool) <- liftIO $ atomically (popTransferrerPool pool)
|
2020-12-09 16:43:38 +00:00
|
|
|
(i@(TransferrerPoolItem _ check), t) <- liftIO $ case mi of
|
|
|
|
Nothing -> do
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
t <- mkTransferrer signalactonsvar rt
|
2020-12-09 16:43:38 +00:00
|
|
|
i <- mkTransferrerPoolItem mkcheck t
|
|
|
|
return (i, t)
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
Just i -> checkTransferrerPoolItem signalactonsvar rt i
|
2020-12-07 16:50:48 +00:00
|
|
|
a t `finally` returntopool leftinpool check t i
|
|
|
|
where
|
|
|
|
returntopool leftinpool check t i
|
|
|
|
| not minimizeprocesses || leftinpool == 0 =
|
2020-12-08 19:22:18 +00:00
|
|
|
-- If the transferrer got killed, the handles will
|
|
|
|
-- be closed, so it should not be returned to the
|
|
|
|
-- pool.
|
|
|
|
liftIO $ whenM (hIsOpen (transferrerWrite t)) $
|
|
|
|
liftIO $ atomically $ pushTransferrerPool pool i
|
2020-12-07 20:11:29 +00:00
|
|
|
| otherwise = liftIO $ do
|
2020-12-11 19:28:58 +00:00
|
|
|
void $ forkIO $ transferrerShutdown t
|
2014-01-06 21:07:08 +00:00
|
|
|
atomically $ pushTransferrerPool pool $ TransferrerPoolItem Nothing check
|
assistant: Start a new git-annex transferkeys process after a network connection change
So that remotes that use a persistent network connection are restarted.
A remote might keep open a long duration network connection, and could
fail to deal well with losing the connection. This is particularly a
concern now that we have external special reotes. An external
special remote that is implemented naively might open the connection only
when PREPARE is sent, and if it loses connection, throw errors on each
request that is made.
(Note that the ssh connection caching should not have this problem; if the
long-duration ssh process loses connection, the named pipe is disconnected
and the next ssh attempt will reconnect. Also, XMPP already deals with
disconnection robustly in its own way.)
There's no way for git-annex to know if a lost network connection actually
affects a given remote, which might have a transfer in process. It does not
make sense to force kill the transferkeys process every time the NetWatcher
detects a change. (Especially because the NetWatcher sometimes polls 1
change per hour.)
In any case, the NetWatcher only detects connection to a network, not
disconnection. So if a transfer is in progress over the network, and the
network goes down, that will need to time out on its own.
An alternate approch that was considered is to use a separate transferkeys
process for each remote, and detect when a request fails, and assume that
means that process is in a failing state and restart it. The problem with
that approach is that if a resource is not available and a remote fails
every time, it degrades to starting a new transferkeys process for every
file transfer, which is too expensive.
Instead, this commit only handles the network reconnection case, and restarts
transferkeys only once the network has reconnected and another transfer needs
to be made. So, a transferkeys process will be reused for 1 hour, or until the
next network connection.
----
The NotificationBroadcaster was rewritten to use TMVars rather than MSampleVars,
to allow checking without blocking if a notification has been received.
----
This commit was sponsored by Tobias Brunner.
2014-01-06 20:03:39 +00:00
|
|
|
|
|
|
|
{- Check if a Transferrer from the pool is still ok to be used.
|
|
|
|
- If not, stop it and start a new one. -}
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
checkTransferrerPoolItem :: SignalActionsVar -> RunTransferrer -> TransferrerPoolItem -> IO (TransferrerPoolItem, Transferrer)
|
|
|
|
checkTransferrerPoolItem signalactonsvar rt i = case i of
|
assistant: Start a new git-annex transferkeys process after a network connection change
So that remotes that use a persistent network connection are restarted.
A remote might keep open a long duration network connection, and could
fail to deal well with losing the connection. This is particularly a
concern now that we have external special reotes. An external
special remote that is implemented naively might open the connection only
when PREPARE is sent, and if it loses connection, throw errors on each
request that is made.
(Note that the ssh connection caching should not have this problem; if the
long-duration ssh process loses connection, the named pipe is disconnected
and the next ssh attempt will reconnect. Also, XMPP already deals with
disconnection robustly in its own way.)
There's no way for git-annex to know if a lost network connection actually
affects a given remote, which might have a transfer in process. It does not
make sense to force kill the transferkeys process every time the NetWatcher
detects a change. (Especially because the NetWatcher sometimes polls 1
change per hour.)
In any case, the NetWatcher only detects connection to a network, not
disconnection. So if a transfer is in progress over the network, and the
network goes down, that will need to time out on its own.
An alternate approch that was considered is to use a separate transferkeys
process for each remote, and detect when a request fails, and assume that
means that process is in a failing state and restart it. The problem with
that approach is that if a resource is not available and a remote fails
every time, it degrades to starting a new transferkeys process for every
file transfer, which is too expensive.
Instead, this commit only handles the network reconnection case, and restarts
transferkeys only once the network has reconnected and another transfer needs
to be made. So, a transferkeys process will be reused for 1 hour, or until the
next network connection.
----
The NotificationBroadcaster was rewritten to use TMVars rather than MSampleVars,
to allow checking without blocking if a notification has been received.
----
This commit was sponsored by Tobias Brunner.
2014-01-06 20:03:39 +00:00
|
|
|
TransferrerPoolItem (Just t) check -> ifM check
|
2020-12-09 16:43:38 +00:00
|
|
|
( return (i, t)
|
assistant: Start a new git-annex transferkeys process after a network connection change
So that remotes that use a persistent network connection are restarted.
A remote might keep open a long duration network connection, and could
fail to deal well with losing the connection. This is particularly a
concern now that we have external special reotes. An external
special remote that is implemented naively might open the connection only
when PREPARE is sent, and if it loses connection, throw errors on each
request that is made.
(Note that the ssh connection caching should not have this problem; if the
long-duration ssh process loses connection, the named pipe is disconnected
and the next ssh attempt will reconnect. Also, XMPP already deals with
disconnection robustly in its own way.)
There's no way for git-annex to know if a lost network connection actually
affects a given remote, which might have a transfer in process. It does not
make sense to force kill the transferkeys process every time the NetWatcher
detects a change. (Especially because the NetWatcher sometimes polls 1
change per hour.)
In any case, the NetWatcher only detects connection to a network, not
disconnection. So if a transfer is in progress over the network, and the
network goes down, that will need to time out on its own.
An alternate approch that was considered is to use a separate transferkeys
process for each remote, and detect when a request fails, and assume that
means that process is in a failing state and restart it. The problem with
that approach is that if a resource is not available and a remote fails
every time, it degrades to starting a new transferkeys process for every
file transfer, which is too expensive.
Instead, this commit only handles the network reconnection case, and restarts
transferkeys only once the network has reconnected and another transfer needs
to be made. So, a transferkeys process will be reused for 1 hour, or until the
next network connection.
----
The NotificationBroadcaster was rewritten to use TMVars rather than MSampleVars,
to allow checking without blocking if a notification has been received.
----
This commit was sponsored by Tobias Brunner.
2014-01-06 20:03:39 +00:00
|
|
|
, do
|
2020-12-11 19:28:58 +00:00
|
|
|
transferrerShutdown t
|
assistant: Start a new git-annex transferkeys process after a network connection change
So that remotes that use a persistent network connection are restarted.
A remote might keep open a long duration network connection, and could
fail to deal well with losing the connection. This is particularly a
concern now that we have external special reotes. An external
special remote that is implemented naively might open the connection only
when PREPARE is sent, and if it loses connection, throw errors on each
request that is made.
(Note that the ssh connection caching should not have this problem; if the
long-duration ssh process loses connection, the named pipe is disconnected
and the next ssh attempt will reconnect. Also, XMPP already deals with
disconnection robustly in its own way.)
There's no way for git-annex to know if a lost network connection actually
affects a given remote, which might have a transfer in process. It does not
make sense to force kill the transferkeys process every time the NetWatcher
detects a change. (Especially because the NetWatcher sometimes polls 1
change per hour.)
In any case, the NetWatcher only detects connection to a network, not
disconnection. So if a transfer is in progress over the network, and the
network goes down, that will need to time out on its own.
An alternate approch that was considered is to use a separate transferkeys
process for each remote, and detect when a request fails, and assume that
means that process is in a failing state and restart it. The problem with
that approach is that if a resource is not available and a remote fails
every time, it degrades to starting a new transferkeys process for every
file transfer, which is too expensive.
Instead, this commit only handles the network reconnection case, and restarts
transferkeys only once the network has reconnected and another transfer needs
to be made. So, a transferkeys process will be reused for 1 hour, or until the
next network connection.
----
The NotificationBroadcaster was rewritten to use TMVars rather than MSampleVars,
to allow checking without blocking if a notification has been received.
----
This commit was sponsored by Tobias Brunner.
2014-01-06 20:03:39 +00:00
|
|
|
new check
|
2013-03-19 22:46:29 +00:00
|
|
|
)
|
assistant: Start a new git-annex transferkeys process after a network connection change
So that remotes that use a persistent network connection are restarted.
A remote might keep open a long duration network connection, and could
fail to deal well with losing the connection. This is particularly a
concern now that we have external special reotes. An external
special remote that is implemented naively might open the connection only
when PREPARE is sent, and if it loses connection, throw errors on each
request that is made.
(Note that the ssh connection caching should not have this problem; if the
long-duration ssh process loses connection, the named pipe is disconnected
and the next ssh attempt will reconnect. Also, XMPP already deals with
disconnection robustly in its own way.)
There's no way for git-annex to know if a lost network connection actually
affects a given remote, which might have a transfer in process. It does not
make sense to force kill the transferkeys process every time the NetWatcher
detects a change. (Especially because the NetWatcher sometimes polls 1
change per hour.)
In any case, the NetWatcher only detects connection to a network, not
disconnection. So if a transfer is in progress over the network, and the
network goes down, that will need to time out on its own.
An alternate approch that was considered is to use a separate transferkeys
process for each remote, and detect when a request fails, and assume that
means that process is in a failing state and restart it. The problem with
that approach is that if a resource is not available and a remote fails
every time, it degrades to starting a new transferkeys process for every
file transfer, which is too expensive.
Instead, this commit only handles the network reconnection case, and restarts
transferkeys only once the network has reconnected and another transfer needs
to be made. So, a transferkeys process will be reused for 1 hour, or until the
next network connection.
----
The NotificationBroadcaster was rewritten to use TMVars rather than MSampleVars,
to allow checking without blocking if a notification has been received.
----
This commit was sponsored by Tobias Brunner.
2014-01-06 20:03:39 +00:00
|
|
|
TransferrerPoolItem Nothing check -> new check
|
|
|
|
where
|
|
|
|
new check = do
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
t <- mkTransferrer signalactonsvar rt
|
2020-12-09 16:43:38 +00:00
|
|
|
return (TransferrerPoolItem (Just t) check, t)
|
2013-03-19 22:46:29 +00:00
|
|
|
|
2020-12-09 19:44:00 +00:00
|
|
|
data TransferRequestLevel = AnnexLevel | AssistantLevel
|
|
|
|
deriving (Show)
|
|
|
|
|
2013-03-19 22:46:29 +00:00
|
|
|
{- Requests that a Transferrer perform a Transfer, and waits for it to
|
2020-12-08 19:22:18 +00:00
|
|
|
- finish.
|
|
|
|
-
|
|
|
|
- When a stall is detected, kills the Transferrer.
|
|
|
|
-
|
|
|
|
- If the transfer failed or stalled, returns TransferInfo with an
|
|
|
|
- updated bytesComplete reflecting how much data has been transferred.
|
|
|
|
-}
|
2020-12-07 16:50:48 +00:00
|
|
|
performTransfer
|
|
|
|
:: (Monad m, MonadIO m, MonadMask m)
|
2020-12-08 19:22:18 +00:00
|
|
|
=> Maybe StallDetection
|
2020-12-07 20:11:29 +00:00
|
|
|
-> TransferRequestLevel
|
2020-12-07 16:50:48 +00:00
|
|
|
-> (forall a. Annex a -> m a)
|
|
|
|
-- ^ Run an annex action in the monad. Will not be used with
|
|
|
|
-- actions that block for a long time.
|
2020-12-08 19:22:18 +00:00
|
|
|
-> Maybe Remote
|
|
|
|
-> Transfer
|
|
|
|
-> TransferInfo
|
|
|
|
-> Transferrer
|
|
|
|
-> m (Either TransferInfo ())
|
|
|
|
performTransfer stalldetection level runannex r t info transferrer = do
|
|
|
|
bpv <- liftIO $ newTVarIO zeroBytesProcessed
|
|
|
|
ifM (catchBoolIO $ bracket setup cleanup (go bpv))
|
|
|
|
( return (Right ())
|
|
|
|
, do
|
avoid using temp file size when deciding whether to retry failed transfer
When stall detection is enabled, and a transfer is in progress,
it would display a doubled message:
(transfer already in progress, or unable to take transfer lock) (transfer already in progress, or unable to take transfer lock)
That happened because the forward retry decider had a start size of 0,
and an end size of whatever amount of the object the other process had
downloaded. So it incorrectly thought that the transferrer process had
made progress, when it had in fact immediately given up with that
message.
Instead, use the reported value from the progress meter. If a remote
does not report progress, this will mean it doesn't forward retry, in a
situation where it used to. But most remotes do report progress, and any
remote that does not can be fixed to, by using watchFileSize when
downloading. Also, some remotes might preallocate the temp file (eg
bittorrent), so relying on statting its size at this level to get
progress is dubious.
The same change was made to Annex/Transfer.hs, although only
Annex/TransferrerPool.hs needed to be changed to avoid the duplicate
message.
(An alternate fix would have been to start the retry decider with the
size of the object file before downloading begins, rather than 0.)
Sponsored-by: Brett Eisenberg on Patreon
2021-06-25 15:53:28 +00:00
|
|
|
n <- liftIO $ atomically $
|
|
|
|
fromBytesProcessed <$> readTVar bpv
|
2020-12-08 19:22:18 +00:00
|
|
|
return $ Left $ info { bytesComplete = Just n }
|
|
|
|
)
|
|
|
|
where
|
|
|
|
setup = do
|
|
|
|
liftIO $ sendRequest level t r
|
|
|
|
(associatedFile info)
|
|
|
|
(transferrerWrite transferrer)
|
2020-12-11 22:26:30 +00:00
|
|
|
metervar <- liftIO $ newTVarIO Nothing
|
2020-12-08 19:22:18 +00:00
|
|
|
stalledvar <- liftIO $ newTVarIO False
|
|
|
|
tid <- liftIO $ async $
|
|
|
|
detectStalls stalldetection metervar $ do
|
|
|
|
atomically $ writeTVar stalledvar True
|
|
|
|
killTransferrer transferrer
|
|
|
|
return (metervar, tid, stalledvar)
|
|
|
|
|
|
|
|
cleanup (_, tid, stalledvar) = do
|
|
|
|
liftIO $ uninterruptibleCancel tid
|
|
|
|
whenM (liftIO $ atomically $ readTVar stalledvar) $ do
|
|
|
|
runannex $ showLongNote "Transfer stalled"
|
|
|
|
-- Close handles, to prevent the transferrer being
|
|
|
|
-- reused since the process was killed.
|
|
|
|
liftIO $ hClose $ transferrerRead transferrer
|
|
|
|
liftIO $ hClose $ transferrerWrite transferrer
|
|
|
|
|
|
|
|
go bpv (metervar, _, _) = relaySerializedOutput
|
2020-12-07 20:11:29 +00:00
|
|
|
(liftIO $ readResponse (transferrerRead transferrer))
|
|
|
|
(liftIO . sendSerializedOutputResponse (transferrerWrite transferrer))
|
2020-12-08 19:22:18 +00:00
|
|
|
(updatemeter bpv metervar)
|
2020-12-07 16:50:48 +00:00
|
|
|
runannex
|
2020-12-08 19:22:18 +00:00
|
|
|
|
|
|
|
updatemeter bpv metervar (Just n) = liftIO $ do
|
2020-12-11 22:26:30 +00:00
|
|
|
atomically $ writeTVar metervar (Just n)
|
2020-12-08 19:22:18 +00:00
|
|
|
atomically $ writeTVar bpv n
|
|
|
|
updatemeter _bpv metervar Nothing = liftIO $
|
2020-12-11 22:26:30 +00:00
|
|
|
atomically $ writeTVar metervar Nothing
|
2020-12-08 19:22:18 +00:00
|
|
|
|
2020-12-09 16:32:29 +00:00
|
|
|
{- Starts a new git-annex transfer process, setting up handles
|
2013-03-19 22:46:29 +00:00
|
|
|
- that will be used to communicate with it. -}
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
mkTransferrer :: SignalActionsVar -> RunTransferrer -> IO Transferrer
|
|
|
|
mkTransferrer signalactonsvar (RunTransferrer program params batchmaker) = do
|
2013-12-01 18:56:37 +00:00
|
|
|
{- It runs as a batch job. -}
|
propagate git-annex -c on to transferrer child process
git -c was already propagated via environment, but need this for
consistency.
Also, notice it does not use gitAnnexChildProcess to run the
transferrer. So nothing is done about avoid it taking the
pid lock. It's possible that the caller is already doing something that
took the pid lock, and if so, the transferrer will certianly fail,
since it needs to take the pid lock too. This may prevent combining
annex.stalldetection with annex.pidlock, but I have not verified it's
really a problem. If it was, it seems git-annex would have to take
the pid lock when starting a transferrer, and hold it until shutdown,
or would need to take pid lock when starting to use a transferrer,
and hold it until done with a transfer and then drop it. The latter
would require starting the transferrer with pid locking disabled for the
child process, so assumes that the transferrer does not do anyting that
needs locking when not running a transfer.
2020-12-15 15:36:25 +00:00
|
|
|
let (program', params') = batchmaker (program, params)
|
2013-03-19 22:46:29 +00:00
|
|
|
{- It's put into its own group so that the whole group can be
|
|
|
|
- killed to stop a transfer. -}
|
2020-12-11 19:28:58 +00:00
|
|
|
(Just writeh, Just readh, _, ph) <- createProcess
|
2013-12-11 03:19:18 +00:00
|
|
|
(proc program' $ toCommand params')
|
|
|
|
{ create_group = True
|
|
|
|
, std_in = CreatePipe
|
|
|
|
, std_out = CreatePipe
|
|
|
|
}
|
2020-12-11 19:28:58 +00:00
|
|
|
|
|
|
|
{- Set up signal propagation, so eg ctrl-c will also interrupt
|
|
|
|
- the processes in the transferrer's process group.
|
|
|
|
-
|
|
|
|
- There is a race between the process being created and this point.
|
|
|
|
- If a signal is received before this can run, it is not sent to
|
|
|
|
- the transferrer. This leaves the transferrer waiting for the
|
|
|
|
- first message on stdin to tell what to do. If the signal kills
|
|
|
|
- this parent process, the transferrer will then get a sigpipe
|
|
|
|
- and die too. If the signal suspends this parent process,
|
|
|
|
- it's ok to leave the transferrer running, as it's waiting on
|
|
|
|
- the pipe until this process wakes back up.
|
|
|
|
-}
|
|
|
|
#ifndef mingw32_HOST_OS
|
|
|
|
pid <- getPid ph
|
|
|
|
unregistersignalprop <- case pid of
|
|
|
|
Just p -> getProcessGroupIDOf p >>= \pgrp -> do
|
|
|
|
atomically $ modifyTVar' signalactonsvar $
|
|
|
|
M.insert (PropagateSignalProcessGroup p) $ \sig ->
|
|
|
|
signalProcessGroup (fromIntegral sig) pgrp
|
|
|
|
return $ atomically $ modifyTVar' signalactonsvar $
|
|
|
|
M.delete (PropagateSignalProcessGroup p)
|
|
|
|
Nothing -> return noop
|
|
|
|
#else
|
|
|
|
let unregistersignalprop = noop
|
|
|
|
#endif
|
|
|
|
|
2013-03-19 22:46:29 +00:00
|
|
|
return $ Transferrer
|
2013-12-11 03:19:18 +00:00
|
|
|
{ transferrerRead = readh
|
|
|
|
, transferrerWrite = writeh
|
2020-12-11 19:28:58 +00:00
|
|
|
, transferrerHandle = ph
|
|
|
|
, transferrerShutdown = do
|
2022-09-22 18:35:20 +00:00
|
|
|
-- The transferrer may write to stdout
|
|
|
|
-- as it's shutting down, so don't close
|
|
|
|
-- the readh right away. Instead, drain
|
|
|
|
-- anything sent to it.
|
|
|
|
drainer <- async $ void $ hGetContents readh
|
2020-12-11 19:28:58 +00:00
|
|
|
hClose writeh
|
|
|
|
void $ waitForProcess ph
|
2022-09-22 18:35:20 +00:00
|
|
|
wait drainer
|
|
|
|
hClose readh
|
2020-12-11 19:28:58 +00:00
|
|
|
unregistersignalprop
|
2013-03-19 22:46:29 +00:00
|
|
|
}
|
|
|
|
|
2020-12-07 20:11:29 +00:00
|
|
|
-- | Send a request to perform a transfer.
|
|
|
|
sendRequest :: TransferRequestLevel -> Transfer -> Maybe Remote -> AssociatedFile -> Handle -> IO ()
|
|
|
|
sendRequest level t mremote afile h = do
|
2020-12-09 19:44:00 +00:00
|
|
|
let tr = maybe
|
|
|
|
(TransferRemoteUUID (transferUUID t))
|
|
|
|
(TransferRemoteName . Remote.name)
|
|
|
|
mremote
|
|
|
|
let f = case (level, transferDirection t) of
|
|
|
|
(AnnexLevel, Upload) -> UploadRequest
|
|
|
|
(AnnexLevel, Download) -> DownloadRequest
|
|
|
|
(AssistantLevel, Upload) -> AssistantUploadRequest
|
|
|
|
(AssistantLevel, Download) -> AssistantDownloadRequest
|
|
|
|
let r = f tr (transferKey t) (TransferAssociatedFile afile)
|
|
|
|
let l = unwords $ Proto.formatMessage r
|
2021-04-05 17:40:31 +00:00
|
|
|
debug "Annex.TransferrerPool" ("> " ++ l)
|
2020-12-07 20:11:29 +00:00
|
|
|
hPutStrLn h l
|
|
|
|
hFlush h
|
|
|
|
|
|
|
|
sendSerializedOutputResponse :: Handle -> SerializedOutputResponse -> IO ()
|
2020-12-09 19:44:00 +00:00
|
|
|
sendSerializedOutputResponse h sor = do
|
|
|
|
let l = unwords $ Proto.formatMessage $
|
|
|
|
TransferSerializedOutputResponse sor
|
2021-04-05 17:40:31 +00:00
|
|
|
debug "Annex.TransferrerPool" ("> " ++ show l)
|
2020-12-09 19:44:00 +00:00
|
|
|
hPutStrLn h l
|
|
|
|
hFlush h
|
2020-12-07 20:11:29 +00:00
|
|
|
|
2020-12-11 19:28:58 +00:00
|
|
|
-- | Read a response to a transfer request.
|
2020-12-07 20:11:29 +00:00
|
|
|
--
|
|
|
|
-- Before the final response, this will return whatever SerializedOutput
|
|
|
|
-- should be displayed as the transfer is performed.
|
|
|
|
readResponse :: Handle -> IO (Either SerializedOutput Bool)
|
|
|
|
readResponse h = do
|
|
|
|
l <- liftIO $ hGetLine h
|
2021-04-05 17:40:31 +00:00
|
|
|
debug "Annex.TransferrerPool" ("< " ++ l)
|
2020-12-09 19:44:00 +00:00
|
|
|
case Proto.parseMessage l of
|
2020-12-07 20:11:29 +00:00
|
|
|
Just (TransferOutput so) -> return (Left so)
|
|
|
|
Just (TransferResult r) -> return (Right r)
|
2020-12-09 17:21:20 +00:00
|
|
|
Nothing -> transferrerProtocolError l
|
2020-12-07 20:11:29 +00:00
|
|
|
|
2020-12-09 17:21:20 +00:00
|
|
|
transferrerProtocolError :: String -> a
|
2020-12-09 19:44:00 +00:00
|
|
|
transferrerProtocolError l = giveup $ "transferrer protocol error: " ++ show l
|
2020-12-08 15:43:06 +00:00
|
|
|
|
|
|
|
{- Kill the transferrer, and all its child processes. -}
|
|
|
|
killTransferrer :: Transferrer -> IO ()
|
|
|
|
killTransferrer t = do
|
|
|
|
interruptProcessGroupOf $ transferrerHandle t
|
|
|
|
threadDelay 50000 -- 0.05 second grace period
|
|
|
|
terminateProcess $ transferrerHandle t
|
2020-12-09 17:10:35 +00:00
|
|
|
|
|
|
|
{- Stop all transferrers in the pool. -}
|
|
|
|
emptyTransferrerPool :: Annex ()
|
|
|
|
emptyTransferrerPool = do
|
2021-04-02 19:26:21 +00:00
|
|
|
poolvar <- Annex.getRead Annex.transferrerpool
|
2020-12-09 17:10:35 +00:00
|
|
|
pool <- liftIO $ atomically $ swapTVar poolvar []
|
|
|
|
liftIO $ forM_ pool $ \case
|
2020-12-11 19:28:58 +00:00
|
|
|
TransferrerPoolItem (Just t) _ -> transferrerShutdown t
|
2020-12-09 17:10:35 +00:00
|
|
|
TransferrerPoolItem Nothing _ -> noop
|
2022-09-23 19:55:40 +00:00
|
|
|
-- Transferrers usually restage pointer files themselves,
|
|
|
|
-- but when killTransferrer is used, a transferrer may have
|
|
|
|
-- pointer files it has not gotten around to restaging yet.
|
|
|
|
-- So, restage pointer files here in clean up from such killed
|
|
|
|
-- transferrers.
|
|
|
|
unless (null pool) $
|
|
|
|
restagePointerFiles =<< Annex.gitRepo
|