stall detection is working
New config annex.stalldetection, remote.name.annex-stalldetection, which can be used to deal with remotes that stall during transfers, or are sometimes too slow to want to use. This commit was sponsored by Luke Shumaker on Patreon.
This commit is contained in:
parent
09ed9f7d1f
commit
41f2c308ff
11 changed files with 213 additions and 46 deletions
|
@ -19,6 +19,7 @@ module Annex.Transfer (
|
||||||
noRetry,
|
noRetry,
|
||||||
stdRetry,
|
stdRetry,
|
||||||
pickRemote,
|
pickRemote,
|
||||||
|
stallDetection,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
|
@ -40,6 +41,7 @@ import Types.WorkerPool
|
||||||
import Annex.WorkerPool
|
import Annex.WorkerPool
|
||||||
import Annex.TransferrerPool
|
import Annex.TransferrerPool
|
||||||
import Backend (isCryptographicallySecure)
|
import Backend (isCryptographicallySecure)
|
||||||
|
import Types.StallDetection
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
|
@ -47,19 +49,15 @@ import qualified Data.Map.Strict as M
|
||||||
import qualified System.FilePath.ByteString as P
|
import qualified System.FilePath.ByteString as P
|
||||||
import Data.Ord
|
import Data.Ord
|
||||||
|
|
||||||
|
-- Upload, supporting stall detection.
|
||||||
upload :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool
|
upload :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool
|
||||||
upload r key f d _witness =
|
upload r key f d witness = stallDetection r >>= \case
|
||||||
-- TODO: use this when not handling timeouts
|
Nothing -> upload' (Remote.uuid r) key f d go witness
|
||||||
--upload' (Remote.uuid r) key f d $
|
Just sd -> runTransferrer sd r key f d Upload witness
|
||||||
-- action . Remote.storeKey r key f
|
where
|
||||||
|
go = action . Remote.storeKey r key f
|
||||||
-- TODO: RetryDecider
|
|
||||||
-- TODO: Handle timeouts
|
|
||||||
withTransferrer $ \transferrer ->
|
|
||||||
performTransfer transferrer AnnexLevel
|
|
||||||
(Transfer Upload (Remote.uuid r) (fromKey id key))
|
|
||||||
(Just r) f id
|
|
||||||
|
|
||||||
|
-- Upload, not supporting stall detection.
|
||||||
upload' :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
|
upload' :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
|
||||||
upload' u key f d a _witness = guardHaveUUID u $
|
upload' u key f d a _witness = guardHaveUUID u $
|
||||||
runTransfer (Transfer Upload u (fromKey id key)) f d a
|
runTransfer (Transfer Upload u (fromKey id key)) f d a
|
||||||
|
@ -68,22 +66,17 @@ alwaysUpload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider ->
|
||||||
alwaysUpload u key f d a _witness = guardHaveUUID u $
|
alwaysUpload u key f d a _witness = guardHaveUUID u $
|
||||||
alwaysRunTransfer (Transfer Upload u (fromKey id key)) f d a
|
alwaysRunTransfer (Transfer Upload u (fromKey id key)) f d a
|
||||||
|
|
||||||
|
-- Download, supporting stall detection.
|
||||||
download :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool
|
download :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool
|
||||||
download r key f d witness =
|
download r key f d witness = stallDetection r >>= \case
|
||||||
-- TODO: use this when not handling timeouts
|
Nothing -> getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) key f $ \dest ->
|
||||||
--getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) key f $ \dest ->
|
download' (Remote.uuid r) key f d (go dest) witness
|
||||||
-- download' (Remote.uuid r) key f d (go dest) witness
|
Just sd -> runTransferrer sd r key f d Download witness
|
||||||
|
|
||||||
-- TODO: RetryDecider
|
|
||||||
-- TODO: Handle timeouts
|
|
||||||
withTransferrer $ \transferrer ->
|
|
||||||
performTransfer transferrer AnnexLevel
|
|
||||||
(Transfer Download (Remote.uuid r) (fromKey id key))
|
|
||||||
(Just r) f id
|
|
||||||
where
|
where
|
||||||
go dest p = verifiedAction $
|
go dest p = verifiedAction $
|
||||||
Remote.retrieveKeyFile r key f (fromRawFilePath dest) p
|
Remote.retrieveKeyFile r key f (fromRawFilePath dest) p
|
||||||
|
|
||||||
|
-- Download, not supporting stall detection.
|
||||||
download' :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
|
download' :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
|
||||||
download' u key f d a _witness = guardHaveUUID u $
|
download' u key f d a _witness = guardHaveUUID u $
|
||||||
runTransfer (Transfer Download u (fromKey id key)) f d a
|
runTransfer (Transfer Download u (fromKey id key)) f d a
|
||||||
|
@ -115,7 +108,7 @@ alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider
|
||||||
alwaysRunTransfer = runTransfer' True
|
alwaysRunTransfer = runTransfer' True
|
||||||
|
|
||||||
runTransfer' :: Observable v => Bool -> Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
|
runTransfer' :: Observable v => Bool -> Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
|
||||||
runTransfer' ignorelock t afile retrydecider transferaction = enteringStage TransferStage $ debugLocks $ preCheckSecureHashes t $ do
|
runTransfer' ignorelock t afile retrydecider transferaction = enteringStage TransferStage $ debugLocks $ preCheckSecureHashes (transferKey t) $ do
|
||||||
info <- liftIO $ startTransferInfo afile
|
info <- liftIO $ startTransferInfo afile
|
||||||
(meter, tfile, createtfile, metervar) <- mkProgressUpdater t info
|
(meter, tfile, createtfile, metervar) <- mkProgressUpdater t info
|
||||||
mode <- annexFileMode
|
mode <- annexFileMode
|
||||||
|
@ -202,6 +195,31 @@ runTransfer' ignorelock t afile retrydecider transferaction = enteringStage Tran
|
||||||
f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
|
f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
|
||||||
liftIO $ catchDefaultIO 0 $ getFileSize f
|
liftIO $ catchDefaultIO 0 $ getFileSize f
|
||||||
|
|
||||||
|
runTransferrer
|
||||||
|
:: StallDetection
|
||||||
|
-> Remote
|
||||||
|
-> Key
|
||||||
|
-> AssociatedFile
|
||||||
|
-> RetryDecider
|
||||||
|
-> Direction
|
||||||
|
-> NotifyWitness
|
||||||
|
-> Annex Bool
|
||||||
|
runTransferrer sd r k afile retrydecider direction _witness =
|
||||||
|
enteringStage TransferStage $ preCheckSecureHashes k $ do
|
||||||
|
info <- liftIO $ startTransferInfo afile
|
||||||
|
go 0 info
|
||||||
|
where
|
||||||
|
go numretries info =
|
||||||
|
withTransferrer (performTransfer (Just sd) AnnexLevel id (Just r) t info) >>= \case
|
||||||
|
Right () -> return True
|
||||||
|
Left newinfo -> do
|
||||||
|
let !numretries' = succ numretries
|
||||||
|
ifM (retrydecider numretries' info newinfo)
|
||||||
|
( go numretries' newinfo
|
||||||
|
, return False
|
||||||
|
)
|
||||||
|
t = Transfer direction (Remote.uuid r) (fromKey id k)
|
||||||
|
|
||||||
{- Avoid download and upload of keys with insecure content when
|
{- Avoid download and upload of keys with insecure content when
|
||||||
- annex.securehashesonly is configured.
|
- annex.securehashesonly is configured.
|
||||||
-
|
-
|
||||||
|
@ -214,8 +232,8 @@ runTransfer' ignorelock t afile retrydecider transferaction = enteringStage Tran
|
||||||
- still contains content using an insecure hash, remotes will likewise
|
- still contains content using an insecure hash, remotes will likewise
|
||||||
- tend to be configured to reject it, so Upload is also prevented.
|
- tend to be configured to reject it, so Upload is also prevented.
|
||||||
-}
|
-}
|
||||||
preCheckSecureHashes :: Observable v => Transfer -> Annex v -> Annex v
|
preCheckSecureHashes :: Observable v => Key -> Annex v -> Annex v
|
||||||
preCheckSecureHashes t a = ifM (isCryptographicallySecure (transferKey t))
|
preCheckSecureHashes k a = ifM (isCryptographicallySecure k)
|
||||||
( a
|
( a
|
||||||
, ifM (annexSecureHashesOnly <$> Annex.getGitConfig)
|
, ifM (annexSecureHashesOnly <$> Annex.getGitConfig)
|
||||||
( do
|
( do
|
||||||
|
@ -225,7 +243,7 @@ preCheckSecureHashes t a = ifM (isCryptographicallySecure (transferKey t))
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
variety = fromKey keyVariety (transferKey t)
|
variety = fromKey keyVariety k
|
||||||
|
|
||||||
type NumRetries = Integer
|
type NumRetries = Integer
|
||||||
|
|
||||||
|
@ -348,3 +366,9 @@ lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
|
||||||
lessActiveFirst active a b
|
lessActiveFirst active a b
|
||||||
| Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b
|
| Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b
|
||||||
| otherwise = comparing Remote.cost a b
|
| otherwise = comparing Remote.cost a b
|
||||||
|
|
||||||
|
stallDetection :: Remote -> Annex (Maybe StallDetection)
|
||||||
|
stallDetection r = maybe globalcfg (pure . Just) remotecfg
|
||||||
|
where
|
||||||
|
globalcfg = annexStallDetection <$> Annex.getGitConfig
|
||||||
|
remotecfg = remoteAnnexStallDetection $ Remote.gitconfig r
|
||||||
|
|
|
@ -16,15 +16,21 @@ import Types.Transfer
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import qualified Types.Remote as Remote
|
import qualified Types.Remote as Remote
|
||||||
import Git.Types (RemoteName)
|
import Git.Types (RemoteName)
|
||||||
|
import Types.StallDetection
|
||||||
import Types.Messages
|
import Types.Messages
|
||||||
import Messages.Serialized
|
import Messages.Serialized
|
||||||
import Annex.Path
|
import Annex.Path
|
||||||
import Utility.Batch
|
import Utility.Batch
|
||||||
|
import Utility.Metered
|
||||||
|
import Utility.HumanTime
|
||||||
|
import Utility.ThreadScheduler
|
||||||
|
|
||||||
import Control.Concurrent.STM hiding (check)
|
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
|
import Control.Concurrent.Async
|
||||||
|
import Control.Concurrent.STM hiding (check)
|
||||||
import Control.Monad.IO.Class (MonadIO)
|
import Control.Monad.IO.Class (MonadIO)
|
||||||
import Text.Read (readMaybe)
|
import Text.Read (readMaybe)
|
||||||
|
import Data.Time.Clock.POSIX
|
||||||
import System.Log.Logger (debugM)
|
import System.Log.Logger (debugM)
|
||||||
|
|
||||||
data TransferRequest = TransferRequest TransferRequestLevel Direction (Either UUID RemoteName) KeyData AssociatedFile
|
data TransferRequest = TransferRequest TransferRequestLevel Direction (Either UUID RemoteName) KeyData AssociatedFile
|
||||||
|
@ -68,7 +74,11 @@ withTransferrer' minimizeprocesses mkcheck program batchmaker pool a = do
|
||||||
where
|
where
|
||||||
returntopool leftinpool check t i
|
returntopool leftinpool check t i
|
||||||
| not minimizeprocesses || leftinpool == 0 =
|
| not minimizeprocesses || leftinpool == 0 =
|
||||||
liftIO $ atomically $ pushTransferrerPool pool i
|
-- If the transferrer got killed, the handles will
|
||||||
|
-- be closed, so it should not be returned to the
|
||||||
|
-- pool.
|
||||||
|
liftIO $ whenM (hIsOpen (transferrerWrite t)) $
|
||||||
|
liftIO $ atomically $ pushTransferrerPool pool i
|
||||||
| otherwise = liftIO $ do
|
| otherwise = liftIO $ do
|
||||||
void $ forkIO $ shutdownTransferrer t
|
void $ forkIO $ shutdownTransferrer t
|
||||||
atomically $ pushTransferrerPool pool $ TransferrerPoolItem Nothing check
|
atomically $ pushTransferrerPool pool $ TransferrerPoolItem Nothing check
|
||||||
|
@ -90,24 +100,102 @@ checkTransferrerPoolItem program batchmaker i = case i of
|
||||||
return $ TransferrerPoolItem (Just t) check
|
return $ TransferrerPoolItem (Just t) check
|
||||||
|
|
||||||
{- Requests that a Transferrer perform a Transfer, and waits for it to
|
{- Requests that a Transferrer perform a Transfer, and waits for it to
|
||||||
- finish. -}
|
- finish.
|
||||||
|
-
|
||||||
|
- When a stall is detected, kills the Transferrer.
|
||||||
|
-
|
||||||
|
- If the transfer failed or stalled, returns TransferInfo with an
|
||||||
|
- updated bytesComplete reflecting how much data has been transferred.
|
||||||
|
-}
|
||||||
performTransfer
|
performTransfer
|
||||||
:: (Monad m, MonadIO m, MonadMask m)
|
:: (Monad m, MonadIO m, MonadMask m)
|
||||||
=> Transferrer
|
=> Maybe StallDetection
|
||||||
-> TransferRequestLevel
|
-> TransferRequestLevel
|
||||||
-> Transfer
|
|
||||||
-> Maybe Remote
|
|
||||||
-> AssociatedFile
|
|
||||||
-> (forall a. Annex a -> m a)
|
-> (forall a. Annex a -> m a)
|
||||||
-- ^ Run an annex action in the monad. Will not be used with
|
-- ^ Run an annex action in the monad. Will not be used with
|
||||||
-- actions that block for a long time.
|
-- actions that block for a long time.
|
||||||
-> m Bool
|
-> Maybe Remote
|
||||||
performTransfer transferrer level t mremote afile runannex = catchBoolIO $ do
|
-> Transfer
|
||||||
(liftIO $ sendRequest level t mremote afile (transferrerWrite transferrer))
|
-> TransferInfo
|
||||||
relaySerializedOutput
|
-> Transferrer
|
||||||
|
-> m (Either TransferInfo ())
|
||||||
|
performTransfer stalldetection level runannex r t info transferrer = do
|
||||||
|
bpv <- liftIO $ newTVarIO zeroBytesProcessed
|
||||||
|
ifM (catchBoolIO $ bracket setup cleanup (go bpv))
|
||||||
|
( return (Right ())
|
||||||
|
, do
|
||||||
|
n <- case transferDirection t of
|
||||||
|
Upload -> liftIO $ atomically $
|
||||||
|
fromBytesProcessed <$> readTVar bpv
|
||||||
|
Download -> do
|
||||||
|
f <- runannex $ fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
|
||||||
|
liftIO $ catchDefaultIO 0 $ getFileSize f
|
||||||
|
return $ Left $ info { bytesComplete = Just n }
|
||||||
|
)
|
||||||
|
where
|
||||||
|
setup = do
|
||||||
|
liftIO $ sendRequest level t r
|
||||||
|
(associatedFile info)
|
||||||
|
(transferrerWrite transferrer)
|
||||||
|
metervar <- liftIO $ newEmptyTMVarIO
|
||||||
|
stalledvar <- liftIO $ newTVarIO False
|
||||||
|
tid <- liftIO $ async $
|
||||||
|
detectStalls stalldetection metervar $ do
|
||||||
|
atomically $ writeTVar stalledvar True
|
||||||
|
killTransferrer transferrer
|
||||||
|
return (metervar, tid, stalledvar)
|
||||||
|
|
||||||
|
cleanup (_, tid, stalledvar) = do
|
||||||
|
liftIO $ uninterruptibleCancel tid
|
||||||
|
whenM (liftIO $ atomically $ readTVar stalledvar) $ do
|
||||||
|
runannex $ showLongNote "Transfer stalled"
|
||||||
|
-- Close handles, to prevent the transferrer being
|
||||||
|
-- reused since the process was killed.
|
||||||
|
liftIO $ hClose $ transferrerRead transferrer
|
||||||
|
liftIO $ hClose $ transferrerWrite transferrer
|
||||||
|
|
||||||
|
go bpv (metervar, _, _) = relaySerializedOutput
|
||||||
(liftIO $ readResponse (transferrerRead transferrer))
|
(liftIO $ readResponse (transferrerRead transferrer))
|
||||||
(liftIO . sendSerializedOutputResponse (transferrerWrite transferrer))
|
(liftIO . sendSerializedOutputResponse (transferrerWrite transferrer))
|
||||||
|
(updatemeter bpv metervar)
|
||||||
runannex
|
runannex
|
||||||
|
|
||||||
|
updatemeter bpv metervar (Just n) = liftIO $ do
|
||||||
|
atomically $ do
|
||||||
|
void $ tryTakeTMVar metervar
|
||||||
|
putTMVar metervar n
|
||||||
|
atomically $ writeTVar bpv n
|
||||||
|
updatemeter _bpv metervar Nothing = liftIO $
|
||||||
|
atomically $ void $ tryTakeTMVar metervar
|
||||||
|
|
||||||
|
detectStalls :: Maybe StallDetection -> TMVar BytesProcessed -> IO () -> IO ()
|
||||||
|
detectStalls Nothing _ _ = noop
|
||||||
|
detectStalls (Just (StallDetection minsz duration)) metervar onstall = go Nothing
|
||||||
|
where
|
||||||
|
go st = do
|
||||||
|
starttm <- getPOSIXTime
|
||||||
|
threadDelaySeconds (Seconds (fromIntegral (durationSeconds duration)))
|
||||||
|
-- Get whatever progress value was reported most recently, or
|
||||||
|
-- if none were reported since last time, wait until one is
|
||||||
|
-- reported.
|
||||||
|
sofar <- atomically $ fromBytesProcessed <$> takeTMVar metervar
|
||||||
|
case st of
|
||||||
|
Nothing -> go (Just sofar)
|
||||||
|
Just prev
|
||||||
|
-- Just in case a progress meter somehow runs
|
||||||
|
-- backwards, or a second progress meter was
|
||||||
|
-- started and is at a smaller value than
|
||||||
|
-- the previous one.
|
||||||
|
| prev > sofar -> go (Just sofar)
|
||||||
|
| otherwise -> do
|
||||||
|
endtm <- getPOSIXTime
|
||||||
|
let actualduration = endtm - starttm
|
||||||
|
let sz = sofar - prev
|
||||||
|
let expectedsz = (minsz * durationSeconds duration)
|
||||||
|
`div` max 1 (ceiling actualduration)
|
||||||
|
if sz < expectedsz
|
||||||
|
then onstall
|
||||||
|
else go (Just sofar)
|
||||||
|
|
||||||
{- Starts a new git-annex transferkeys process, setting up handles
|
{- Starts a new git-annex transferkeys process, setting up handles
|
||||||
- that will be used to communicate with it. -}
|
- that will be used to communicate with it. -}
|
||||||
|
@ -171,8 +259,6 @@ shutdownTransferrer t = do
|
||||||
{- Kill the transferrer, and all its child processes. -}
|
{- Kill the transferrer, and all its child processes. -}
|
||||||
killTransferrer :: Transferrer -> IO ()
|
killTransferrer :: Transferrer -> IO ()
|
||||||
killTransferrer t = do
|
killTransferrer t = do
|
||||||
hClose $ transferrerRead t
|
|
||||||
hClose $ transferrerWrite t
|
|
||||||
interruptProcessGroupOf $ transferrerHandle t
|
interruptProcessGroupOf $ transferrerHandle t
|
||||||
threadDelay 50000 -- 0.05 second grace period
|
threadDelay 50000 -- 0.05 second grace period
|
||||||
terminateProcess $ transferrerHandle t
|
terminateProcess $ transferrerHandle t
|
||||||
|
|
|
@ -24,6 +24,7 @@ import Assistant.Alert
|
||||||
import Assistant.Alert.Utility
|
import Assistant.Alert.Utility
|
||||||
import Assistant.Commits
|
import Assistant.Commits
|
||||||
import Assistant.Drop
|
import Assistant.Drop
|
||||||
|
import Annex.Transfer (stallDetection)
|
||||||
import Types.Transfer
|
import Types.Transfer
|
||||||
import Logs.Transfer
|
import Logs.Transfer
|
||||||
import Logs.Location
|
import Logs.Location
|
||||||
|
@ -37,6 +38,7 @@ import Annex.Path
|
||||||
import Utility.Batch
|
import Utility.Batch
|
||||||
import Types.NumCopies
|
import Types.NumCopies
|
||||||
|
|
||||||
|
import Data.Either
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import qualified Control.Exception as E
|
import qualified Control.Exception as E
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
|
@ -123,7 +125,8 @@ genTransfer t info = case transferRemote info of
|
||||||
( do
|
( do
|
||||||
debug [ "Transferring:" , describeTransfer t info ]
|
debug [ "Transferring:" , describeTransfer t info ]
|
||||||
notifyTransfer
|
notifyTransfer
|
||||||
return $ Just (t, info, go remote)
|
sd <- liftAnnex $ stallDetection remote
|
||||||
|
return $ Just (t, info, go remote sd)
|
||||||
, do
|
, do
|
||||||
debug [ "Skipping unnecessary transfer:",
|
debug [ "Skipping unnecessary transfer:",
|
||||||
describeTransfer t info ]
|
describeTransfer t info ]
|
||||||
|
@ -162,7 +165,7 @@ genTransfer t info = case transferRemote info of
|
||||||
- usual cleanup. However, first check if something else is
|
- usual cleanup. However, first check if something else is
|
||||||
- running the transfer, to avoid removing active transfers.
|
- running the transfer, to avoid removing active transfers.
|
||||||
-}
|
-}
|
||||||
go remote transferrer = ifM (performTransfer transferrer AssistantLevel t (transferRemote info) (associatedFile info) liftAnnex)
|
go remote sd transferrer = ifM (isRight <$> performTransfer sd AssistantLevel liftAnnex (transferRemote info) t info transferrer)
|
||||||
( do
|
( do
|
||||||
case associatedFile info of
|
case associatedFile info of
|
||||||
AssociatedFile Nothing -> noop
|
AssociatedFile Nothing -> noop
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
git-annex (8.20201128) UNRELEASED; urgency=medium
|
git-annex (8.20201128) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
|
* New config annex.stalldetection, remote.name.annex-stalldetection,
|
||||||
|
which can be used to deal with remotes that stall during transfers,
|
||||||
|
or are sometimes too slow to want to use.
|
||||||
* Fix hang on shutdown of external special remote using ASYNC protocol
|
* Fix hang on shutdown of external special remote using ASYNC protocol
|
||||||
extension. (Reversion introduced in version 8.20201007.)
|
extension. (Reversion introduced in version 8.20201007.)
|
||||||
* Fix bug that made the next download after an empty file from a ssh
|
* Fix bug that made the next download after an empty file from a ssh
|
||||||
|
|
|
@ -20,6 +20,7 @@ import Messages
|
||||||
import Messages.Internal
|
import Messages.Internal
|
||||||
import Messages.Progress
|
import Messages.Progress
|
||||||
import qualified Messages.JSON as JSON
|
import qualified Messages.JSON as JSON
|
||||||
|
import Utility.Metered (BytesProcessed)
|
||||||
|
|
||||||
import Control.Monad.IO.Class (MonadIO)
|
import Control.Monad.IO.Class (MonadIO)
|
||||||
|
|
||||||
|
@ -29,11 +30,16 @@ relaySerializedOutput
|
||||||
=> m (Either SerializedOutput r)
|
=> m (Either SerializedOutput r)
|
||||||
-- ^ Get next serialized output, or final value to return.
|
-- ^ Get next serialized output, or final value to return.
|
||||||
-> (SerializedOutputResponse -> m ())
|
-> (SerializedOutputResponse -> m ())
|
||||||
|
-- ^ Send response to child process.
|
||||||
|
-> (Maybe BytesProcessed -> m ())
|
||||||
|
-- ^ When a progress meter is running, is updated with
|
||||||
|
-- progress meter values sent by the process.
|
||||||
|
-- When a progress meter is stopped, Nothing is sent.
|
||||||
-> (forall a. Annex a -> m a)
|
-> (forall a. Annex a -> m a)
|
||||||
-- ^ Run an annex action in the monad. Will not be used with
|
-- ^ Run an annex action in the monad. Will not be used with
|
||||||
-- actions that block for a long time.
|
-- actions that block for a long time.
|
||||||
-> m r
|
-> m r
|
||||||
relaySerializedOutput getso sendsor runannex = go Nothing
|
relaySerializedOutput getso sendsor meterreport runannex = go Nothing
|
||||||
where
|
where
|
||||||
go st = loop st >>= \case
|
go st = loop st >>= \case
|
||||||
Right r -> return r
|
Right r -> return r
|
||||||
|
@ -69,10 +75,14 @@ relaySerializedOutput getso sendsor runannex = go Nothing
|
||||||
-- output after the progress meter
|
-- output after the progress meter
|
||||||
-- is done.
|
-- is done.
|
||||||
Left _st' -> loop Nothing
|
Left _st' -> loop Nothing
|
||||||
Left EndProgressMeter -> return (Left st)
|
Left EndProgressMeter -> do
|
||||||
|
meterreport Nothing
|
||||||
|
return (Left st)
|
||||||
Left (UpdateProgressMeter n) -> do
|
Left (UpdateProgressMeter n) -> do
|
||||||
case st of
|
case st of
|
||||||
Just meterupdate -> liftIO $ meterupdate n
|
Just meterupdate -> do
|
||||||
|
meterreport (Just n)
|
||||||
|
liftIO $ meterupdate n
|
||||||
Nothing -> noop
|
Nothing -> noop
|
||||||
loop st
|
loop st
|
||||||
Left StartPrompt -> do
|
Left StartPrompt -> do
|
||||||
|
|
|
@ -41,6 +41,7 @@ import Types.NumCopies
|
||||||
import Types.Difference
|
import Types.Difference
|
||||||
import Types.RefSpec
|
import Types.RefSpec
|
||||||
import Types.RepoVersion
|
import Types.RepoVersion
|
||||||
|
import Types.StallDetection
|
||||||
import Config.DynamicConfig
|
import Config.DynamicConfig
|
||||||
import Utility.HumanTime
|
import Utility.HumanTime
|
||||||
import Utility.Gpg (GpgCmd, mkGpgCmd)
|
import Utility.Gpg (GpgCmd, mkGpgCmd)
|
||||||
|
@ -116,6 +117,7 @@ data GitConfig = GitConfig
|
||||||
, annexRetry :: Maybe Integer
|
, annexRetry :: Maybe Integer
|
||||||
, annexForwardRetry :: Maybe Integer
|
, annexForwardRetry :: Maybe Integer
|
||||||
, annexRetryDelay :: Maybe Seconds
|
, annexRetryDelay :: Maybe Seconds
|
||||||
|
, annexStallDetection :: Maybe StallDetection
|
||||||
, annexAllowedUrlSchemes :: S.Set Scheme
|
, annexAllowedUrlSchemes :: S.Set Scheme
|
||||||
, annexAllowedIPAddresses :: String
|
, annexAllowedIPAddresses :: String
|
||||||
, annexAllowUnverifiedDownloads :: Bool
|
, annexAllowUnverifiedDownloads :: Bool
|
||||||
|
@ -202,6 +204,9 @@ extractGitConfig configsource r = GitConfig
|
||||||
, annexForwardRetry = getmayberead (annexConfig "forward-retry")
|
, annexForwardRetry = getmayberead (annexConfig "forward-retry")
|
||||||
, annexRetryDelay = Seconds
|
, annexRetryDelay = Seconds
|
||||||
<$> getmayberead (annexConfig "retrydelay")
|
<$> getmayberead (annexConfig "retrydelay")
|
||||||
|
, annexStallDetection =
|
||||||
|
either (const Nothing) Just . parseStallDetection
|
||||||
|
=<< getmaybe (annexConfig "stalldetection")
|
||||||
, annexAllowedUrlSchemes = S.fromList $ map mkScheme $
|
, annexAllowedUrlSchemes = S.fromList $ map mkScheme $
|
||||||
maybe ["http", "https", "ftp"] words $
|
maybe ["http", "https", "ftp"] words $
|
||||||
getmaybe (annexConfig "security.allowed-url-schemes")
|
getmaybe (annexConfig "security.allowed-url-schemes")
|
||||||
|
@ -306,6 +311,7 @@ data RemoteGitConfig = RemoteGitConfig
|
||||||
, remoteAnnexRetry :: Maybe Integer
|
, remoteAnnexRetry :: Maybe Integer
|
||||||
, remoteAnnexForwardRetry :: Maybe Integer
|
, remoteAnnexForwardRetry :: Maybe Integer
|
||||||
, remoteAnnexRetryDelay :: Maybe Seconds
|
, remoteAnnexRetryDelay :: Maybe Seconds
|
||||||
|
, remoteAnnexStallDetection :: Maybe StallDetection
|
||||||
, remoteAnnexAllowUnverifiedDownloads :: Bool
|
, remoteAnnexAllowUnverifiedDownloads :: Bool
|
||||||
, remoteAnnexConfigUUID :: Maybe UUID
|
, remoteAnnexConfigUUID :: Maybe UUID
|
||||||
|
|
||||||
|
@ -369,6 +375,9 @@ extractRemoteGitConfig r remotename = do
|
||||||
, remoteAnnexForwardRetry = getmayberead "forward-retry"
|
, remoteAnnexForwardRetry = getmayberead "forward-retry"
|
||||||
, remoteAnnexRetryDelay = Seconds
|
, remoteAnnexRetryDelay = Seconds
|
||||||
<$> getmayberead "retrydelay"
|
<$> getmayberead "retrydelay"
|
||||||
|
, remoteAnnexStallDetection =
|
||||||
|
either (const Nothing) Just . parseStallDetection
|
||||||
|
=<< getmaybe "stalldetection"
|
||||||
, remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
|
, remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
|
||||||
getmaybe ("security-allow-unverified-downloads")
|
getmaybe ("security-allow-unverified-downloads")
|
||||||
, remoteAnnexConfigUUID = toUUID <$> getmaybe "config-uuid"
|
, remoteAnnexConfigUUID = toUUID <$> getmaybe "config-uuid"
|
||||||
|
|
29
Types/StallDetection.hs
Normal file
29
Types/StallDetection.hs
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
{- types for stall detection
|
||||||
|
-
|
||||||
|
- Copyright 2020 Joey Hess <id@joeyh.name>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Types.StallDetection where
|
||||||
|
|
||||||
|
import Utility.DataUnits
|
||||||
|
import Utility.HumanTime
|
||||||
|
import Utility.Misc
|
||||||
|
|
||||||
|
-- Unless the given number of bytes have been sent over the given
|
||||||
|
-- amount of time, there's a stall.
|
||||||
|
data StallDetection = StallDetection ByteSize Duration
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
-- Parse eg, "0KiB/60s"
|
||||||
|
parseStallDetection :: String -> Either String StallDetection
|
||||||
|
parseStallDetection s =
|
||||||
|
let (bs, ds) = separate (== '/') s
|
||||||
|
in do
|
||||||
|
b <- maybe
|
||||||
|
(Left $ "Unable to parse stall detection amount " ++ bs)
|
||||||
|
Right
|
||||||
|
(readSize dataUnits bs)
|
||||||
|
d <- parseDuration ds
|
||||||
|
return (StallDetection b d)
|
|
@ -34,7 +34,7 @@ module Utility.Process (
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import qualified Utility.Process.Shim
|
import qualified Utility.Process.Shim
|
||||||
import Utility.Process.Shim as X (CreateProcess(..), ProcessHandle, StdStream(..), CmdSpec(..), proc, getPid, getProcessExitCode, shell, terminateProcess)
|
import Utility.Process.Shim as X (CreateProcess(..), ProcessHandle, StdStream(..), CmdSpec(..), proc, getPid, getProcessExitCode, shell, terminateProcess, interruptProcessGroupOf)
|
||||||
import Utility.Misc
|
import Utility.Misc
|
||||||
import Utility.Exception
|
import Utility.Exception
|
||||||
import Utility.Monad
|
import Utility.Monad
|
||||||
|
|
|
@ -1410,7 +1410,7 @@ Remotes are configured using these settings in `.git/config`.
|
||||||
The format is "$amount/$timeperiod".
|
The format is "$amount/$timeperiod".
|
||||||
|
|
||||||
For example, to detect outright stalls where no data has been transferred
|
For example, to detect outright stalls where no data has been transferred
|
||||||
after 30 seconds: `git config annex.stalldetection "0KiB/60s"`
|
after 30 seconds: `git config annex.stalldetection "0/30s"`
|
||||||
|
|
||||||
Or, if you have a remote on a USB drive that is normally capable of
|
Or, if you have a remote on a USB drive that is normally capable of
|
||||||
several megabytes per second, but has bad sectors where it gets
|
several megabytes per second, but has bad sectors where it gets
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
In a number of scenarios (e.g. [[bugs/still_seeing_errors_with_parallel_git-annex-add]], [[bugs/parallel_copy_fails]], [[git-annex-sync/#comment-aceb18109c0a536e04bcdd3aa04bda29]]), `git-annex` operations may fail or hang due to transient conditions. It would help a lot if `git-annex` could be configured to fail timed-out operations, and to retry failed operations after a delay. This would especially help when using `git-annex` in a script or a higher-level tool. I've tried wrapping some retry logic around `git-annex` calls, but it seems `git-annex` itself is in the best position to do that sensibly (e.g. only retrying idempotent operations, or capping retries per remote). This would be a catch-all fix for unusual conditions that are hard to test for.
|
In a number of scenarios (e.g. [[bugs/still_seeing_errors_with_parallel_git-annex-add]], [[bugs/parallel_copy_fails]], [[git-annex-sync/#comment-aceb18109c0a536e04bcdd3aa04bda29]]), `git-annex` operations may fail or hang due to transient conditions. It would help a lot if `git-annex` could be configured to fail timed-out operations, and to retry failed operations after a delay. This would especially help when using `git-annex` in a script or a higher-level tool. I've tried wrapping some retry logic around `git-annex` calls, but it seems `git-annex` itself is in the best position to do that sensibly (e.g. only retrying idempotent operations, or capping retries per remote). This would be a catch-all fix for unusual conditions that are hard to test for.
|
||||||
|
|
||||||
`git-annex` already has config options `annex.retry` and `annex.retry-delay`, but it seems that they don't cover all failure types.
|
`git-annex` already has config options `annex.retry` and `annex.retry-delay`, but it seems that they don't cover all failure types.
|
||||||
|
|
||||||
|
> Added annex.stalldetection, [[done]] --[[Joey]]
|
||||||
|
|
|
@ -1023,6 +1023,7 @@ Executable git-annex
|
||||||
Types.RepoVersion
|
Types.RepoVersion
|
||||||
Types.ScheduledActivity
|
Types.ScheduledActivity
|
||||||
Types.StandardGroups
|
Types.StandardGroups
|
||||||
|
Types.StallDetection
|
||||||
Types.StoreRetrieve
|
Types.StoreRetrieve
|
||||||
Types.Test
|
Types.Test
|
||||||
Types.Transfer
|
Types.Transfer
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue