stall detection is working

New config annex.stalldetection, remote.name.annex-stalldetection, which
can be used to deal with remotes that stall during transfers, or are
sometimes too slow to want to use.

This commit was sponsored by Luke Shumaker on Patreon.
This commit is contained in:
Joey Hess 2020-12-08 15:22:18 -04:00
parent 09ed9f7d1f
commit 41f2c308ff
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
11 changed files with 213 additions and 46 deletions

View file

@ -19,6 +19,7 @@ module Annex.Transfer (
noRetry, noRetry,
stdRetry, stdRetry,
pickRemote, pickRemote,
stallDetection,
) where ) where
import Annex.Common import Annex.Common
@ -40,6 +41,7 @@ import Types.WorkerPool
import Annex.WorkerPool import Annex.WorkerPool
import Annex.TransferrerPool import Annex.TransferrerPool
import Backend (isCryptographicallySecure) import Backend (isCryptographicallySecure)
import Types.StallDetection
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import Control.Concurrent import Control.Concurrent
@ -47,19 +49,15 @@ import qualified Data.Map.Strict as M
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
import Data.Ord import Data.Ord
-- Upload, supporting stall detection.
upload :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool upload :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool
upload r key f d _witness = upload r key f d witness = stallDetection r >>= \case
-- TODO: use this when not handling timeouts Nothing -> upload' (Remote.uuid r) key f d go witness
--upload' (Remote.uuid r) key f d $ Just sd -> runTransferrer sd r key f d Upload witness
-- action . Remote.storeKey r key f where
go = action . Remote.storeKey r key f
-- TODO: RetryDecider
-- TODO: Handle timeouts
withTransferrer $ \transferrer ->
performTransfer transferrer AnnexLevel
(Transfer Upload (Remote.uuid r) (fromKey id key))
(Just r) f id
-- Upload, not supporting stall detection.
upload' :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v upload' :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
upload' u key f d a _witness = guardHaveUUID u $ upload' u key f d a _witness = guardHaveUUID u $
runTransfer (Transfer Upload u (fromKey id key)) f d a runTransfer (Transfer Upload u (fromKey id key)) f d a
@ -68,22 +66,17 @@ alwaysUpload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider ->
alwaysUpload u key f d a _witness = guardHaveUUID u $ alwaysUpload u key f d a _witness = guardHaveUUID u $
alwaysRunTransfer (Transfer Upload u (fromKey id key)) f d a alwaysRunTransfer (Transfer Upload u (fromKey id key)) f d a
-- Download, supporting stall detection.
download :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool download :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool
download r key f d witness = download r key f d witness = stallDetection r >>= \case
-- TODO: use this when not handling timeouts Nothing -> getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) key f $ \dest ->
--getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) key f $ \dest -> download' (Remote.uuid r) key f d (go dest) witness
-- download' (Remote.uuid r) key f d (go dest) witness Just sd -> runTransferrer sd r key f d Download witness
-- TODO: RetryDecider
-- TODO: Handle timeouts
withTransferrer $ \transferrer ->
performTransfer transferrer AnnexLevel
(Transfer Download (Remote.uuid r) (fromKey id key))
(Just r) f id
where where
go dest p = verifiedAction $ go dest p = verifiedAction $
Remote.retrieveKeyFile r key f (fromRawFilePath dest) p Remote.retrieveKeyFile r key f (fromRawFilePath dest) p
-- Download, not supporting stall detection.
download' :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v download' :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
download' u key f d a _witness = guardHaveUUID u $ download' u key f d a _witness = guardHaveUUID u $
runTransfer (Transfer Download u (fromKey id key)) f d a runTransfer (Transfer Download u (fromKey id key)) f d a
@ -115,7 +108,7 @@ alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider
alwaysRunTransfer = runTransfer' True alwaysRunTransfer = runTransfer' True
runTransfer' :: Observable v => Bool -> Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v runTransfer' :: Observable v => Bool -> Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
runTransfer' ignorelock t afile retrydecider transferaction = enteringStage TransferStage $ debugLocks $ preCheckSecureHashes t $ do runTransfer' ignorelock t afile retrydecider transferaction = enteringStage TransferStage $ debugLocks $ preCheckSecureHashes (transferKey t) $ do
info <- liftIO $ startTransferInfo afile info <- liftIO $ startTransferInfo afile
(meter, tfile, createtfile, metervar) <- mkProgressUpdater t info (meter, tfile, createtfile, metervar) <- mkProgressUpdater t info
mode <- annexFileMode mode <- annexFileMode
@ -202,6 +195,31 @@ runTransfer' ignorelock t afile retrydecider transferaction = enteringStage Tran
f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t) f <- fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
liftIO $ catchDefaultIO 0 $ getFileSize f liftIO $ catchDefaultIO 0 $ getFileSize f
runTransferrer
:: StallDetection
-> Remote
-> Key
-> AssociatedFile
-> RetryDecider
-> Direction
-> NotifyWitness
-> Annex Bool
runTransferrer sd r k afile retrydecider direction _witness =
enteringStage TransferStage $ preCheckSecureHashes k $ do
info <- liftIO $ startTransferInfo afile
go 0 info
where
go numretries info =
withTransferrer (performTransfer (Just sd) AnnexLevel id (Just r) t info) >>= \case
Right () -> return True
Left newinfo -> do
let !numretries' = succ numretries
ifM (retrydecider numretries' info newinfo)
( go numretries' newinfo
, return False
)
t = Transfer direction (Remote.uuid r) (fromKey id k)
{- Avoid download and upload of keys with insecure content when {- Avoid download and upload of keys with insecure content when
- annex.securehashesonly is configured. - annex.securehashesonly is configured.
- -
@ -214,8 +232,8 @@ runTransfer' ignorelock t afile retrydecider transferaction = enteringStage Tran
- still contains content using an insecure hash, remotes will likewise - still contains content using an insecure hash, remotes will likewise
- tend to be configured to reject it, so Upload is also prevented. - tend to be configured to reject it, so Upload is also prevented.
-} -}
preCheckSecureHashes :: Observable v => Transfer -> Annex v -> Annex v preCheckSecureHashes :: Observable v => Key -> Annex v -> Annex v
preCheckSecureHashes t a = ifM (isCryptographicallySecure (transferKey t)) preCheckSecureHashes k a = ifM (isCryptographicallySecure k)
( a ( a
, ifM (annexSecureHashesOnly <$> Annex.getGitConfig) , ifM (annexSecureHashesOnly <$> Annex.getGitConfig)
( do ( do
@ -225,7 +243,7 @@ preCheckSecureHashes t a = ifM (isCryptographicallySecure (transferKey t))
) )
) )
where where
variety = fromKey keyVariety (transferKey t) variety = fromKey keyVariety k
type NumRetries = Integer type NumRetries = Integer
@ -348,3 +366,9 @@ lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
lessActiveFirst active a b lessActiveFirst active a b
| Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b | Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b
| otherwise = comparing Remote.cost a b | otherwise = comparing Remote.cost a b
stallDetection :: Remote -> Annex (Maybe StallDetection)
stallDetection r = maybe globalcfg (pure . Just) remotecfg
where
globalcfg = annexStallDetection <$> Annex.getGitConfig
remotecfg = remoteAnnexStallDetection $ Remote.gitconfig r

View file

@ -16,15 +16,21 @@ import Types.Transfer
import Types.Key import Types.Key
import qualified Types.Remote as Remote import qualified Types.Remote as Remote
import Git.Types (RemoteName) import Git.Types (RemoteName)
import Types.StallDetection
import Types.Messages import Types.Messages
import Messages.Serialized import Messages.Serialized
import Annex.Path import Annex.Path
import Utility.Batch import Utility.Batch
import Utility.Metered
import Utility.HumanTime
import Utility.ThreadScheduler
import Control.Concurrent.STM hiding (check)
import Control.Concurrent import Control.Concurrent
import Control.Concurrent.Async
import Control.Concurrent.STM hiding (check)
import Control.Monad.IO.Class (MonadIO) import Control.Monad.IO.Class (MonadIO)
import Text.Read (readMaybe) import Text.Read (readMaybe)
import Data.Time.Clock.POSIX
import System.Log.Logger (debugM) import System.Log.Logger (debugM)
data TransferRequest = TransferRequest TransferRequestLevel Direction (Either UUID RemoteName) KeyData AssociatedFile data TransferRequest = TransferRequest TransferRequestLevel Direction (Either UUID RemoteName) KeyData AssociatedFile
@ -68,7 +74,11 @@ withTransferrer' minimizeprocesses mkcheck program batchmaker pool a = do
where where
returntopool leftinpool check t i returntopool leftinpool check t i
| not minimizeprocesses || leftinpool == 0 = | not minimizeprocesses || leftinpool == 0 =
liftIO $ atomically $ pushTransferrerPool pool i -- If the transferrer got killed, the handles will
-- be closed, so it should not be returned to the
-- pool.
liftIO $ whenM (hIsOpen (transferrerWrite t)) $
liftIO $ atomically $ pushTransferrerPool pool i
| otherwise = liftIO $ do | otherwise = liftIO $ do
void $ forkIO $ shutdownTransferrer t void $ forkIO $ shutdownTransferrer t
atomically $ pushTransferrerPool pool $ TransferrerPoolItem Nothing check atomically $ pushTransferrerPool pool $ TransferrerPoolItem Nothing check
@ -90,24 +100,102 @@ checkTransferrerPoolItem program batchmaker i = case i of
return $ TransferrerPoolItem (Just t) check return $ TransferrerPoolItem (Just t) check
{- Requests that a Transferrer perform a Transfer, and waits for it to {- Requests that a Transferrer perform a Transfer, and waits for it to
- finish. -} - finish.
-
- When a stall is detected, kills the Transferrer.
-
- If the transfer failed or stalled, returns TransferInfo with an
- updated bytesComplete reflecting how much data has been transferred.
-}
performTransfer performTransfer
:: (Monad m, MonadIO m, MonadMask m) :: (Monad m, MonadIO m, MonadMask m)
=> Transferrer => Maybe StallDetection
-> TransferRequestLevel -> TransferRequestLevel
-> Transfer
-> Maybe Remote
-> AssociatedFile
-> (forall a. Annex a -> m a) -> (forall a. Annex a -> m a)
-- ^ Run an annex action in the monad. Will not be used with -- ^ Run an annex action in the monad. Will not be used with
-- actions that block for a long time. -- actions that block for a long time.
-> m Bool -> Maybe Remote
performTransfer transferrer level t mremote afile runannex = catchBoolIO $ do -> Transfer
(liftIO $ sendRequest level t mremote afile (transferrerWrite transferrer)) -> TransferInfo
relaySerializedOutput -> Transferrer
-> m (Either TransferInfo ())
performTransfer stalldetection level runannex r t info transferrer = do
bpv <- liftIO $ newTVarIO zeroBytesProcessed
ifM (catchBoolIO $ bracket setup cleanup (go bpv))
( return (Right ())
, do
n <- case transferDirection t of
Upload -> liftIO $ atomically $
fromBytesProcessed <$> readTVar bpv
Download -> do
f <- runannex $ fromRepo $ gitAnnexTmpObjectLocation (transferKey t)
liftIO $ catchDefaultIO 0 $ getFileSize f
return $ Left $ info { bytesComplete = Just n }
)
where
setup = do
liftIO $ sendRequest level t r
(associatedFile info)
(transferrerWrite transferrer)
metervar <- liftIO $ newEmptyTMVarIO
stalledvar <- liftIO $ newTVarIO False
tid <- liftIO $ async $
detectStalls stalldetection metervar $ do
atomically $ writeTVar stalledvar True
killTransferrer transferrer
return (metervar, tid, stalledvar)
cleanup (_, tid, stalledvar) = do
liftIO $ uninterruptibleCancel tid
whenM (liftIO $ atomically $ readTVar stalledvar) $ do
runannex $ showLongNote "Transfer stalled"
-- Close handles, to prevent the transferrer being
-- reused since the process was killed.
liftIO $ hClose $ transferrerRead transferrer
liftIO $ hClose $ transferrerWrite transferrer
go bpv (metervar, _, _) = relaySerializedOutput
(liftIO $ readResponse (transferrerRead transferrer)) (liftIO $ readResponse (transferrerRead transferrer))
(liftIO . sendSerializedOutputResponse (transferrerWrite transferrer)) (liftIO . sendSerializedOutputResponse (transferrerWrite transferrer))
(updatemeter bpv metervar)
runannex runannex
updatemeter bpv metervar (Just n) = liftIO $ do
atomically $ do
void $ tryTakeTMVar metervar
putTMVar metervar n
atomically $ writeTVar bpv n
updatemeter _bpv metervar Nothing = liftIO $
atomically $ void $ tryTakeTMVar metervar
detectStalls :: Maybe StallDetection -> TMVar BytesProcessed -> IO () -> IO ()
detectStalls Nothing _ _ = noop
detectStalls (Just (StallDetection minsz duration)) metervar onstall = go Nothing
where
go st = do
starttm <- getPOSIXTime
threadDelaySeconds (Seconds (fromIntegral (durationSeconds duration)))
-- Get whatever progress value was reported most recently, or
-- if none were reported since last time, wait until one is
-- reported.
sofar <- atomically $ fromBytesProcessed <$> takeTMVar metervar
case st of
Nothing -> go (Just sofar)
Just prev
-- Just in case a progress meter somehow runs
-- backwards, or a second progress meter was
-- started and is at a smaller value than
-- the previous one.
| prev > sofar -> go (Just sofar)
| otherwise -> do
endtm <- getPOSIXTime
let actualduration = endtm - starttm
let sz = sofar - prev
let expectedsz = (minsz * durationSeconds duration)
`div` max 1 (ceiling actualduration)
if sz < expectedsz
then onstall
else go (Just sofar)
{- Starts a new git-annex transferkeys process, setting up handles {- Starts a new git-annex transferkeys process, setting up handles
- that will be used to communicate with it. -} - that will be used to communicate with it. -}
@ -171,8 +259,6 @@ shutdownTransferrer t = do
{- Kill the transferrer, and all its child processes. -} {- Kill the transferrer, and all its child processes. -}
killTransferrer :: Transferrer -> IO () killTransferrer :: Transferrer -> IO ()
killTransferrer t = do killTransferrer t = do
hClose $ transferrerRead t
hClose $ transferrerWrite t
interruptProcessGroupOf $ transferrerHandle t interruptProcessGroupOf $ transferrerHandle t
threadDelay 50000 -- 0.05 second grace period threadDelay 50000 -- 0.05 second grace period
terminateProcess $ transferrerHandle t terminateProcess $ transferrerHandle t

View file

@ -24,6 +24,7 @@ import Assistant.Alert
import Assistant.Alert.Utility import Assistant.Alert.Utility
import Assistant.Commits import Assistant.Commits
import Assistant.Drop import Assistant.Drop
import Annex.Transfer (stallDetection)
import Types.Transfer import Types.Transfer
import Logs.Transfer import Logs.Transfer
import Logs.Location import Logs.Location
@ -37,6 +38,7 @@ import Annex.Path
import Utility.Batch import Utility.Batch
import Types.NumCopies import Types.NumCopies
import Data.Either
import qualified Data.Map as M import qualified Data.Map as M
import qualified Control.Exception as E import qualified Control.Exception as E
import Control.Concurrent import Control.Concurrent
@ -123,7 +125,8 @@ genTransfer t info = case transferRemote info of
( do ( do
debug [ "Transferring:" , describeTransfer t info ] debug [ "Transferring:" , describeTransfer t info ]
notifyTransfer notifyTransfer
return $ Just (t, info, go remote) sd <- liftAnnex $ stallDetection remote
return $ Just (t, info, go remote sd)
, do , do
debug [ "Skipping unnecessary transfer:", debug [ "Skipping unnecessary transfer:",
describeTransfer t info ] describeTransfer t info ]
@ -162,7 +165,7 @@ genTransfer t info = case transferRemote info of
- usual cleanup. However, first check if something else is - usual cleanup. However, first check if something else is
- running the transfer, to avoid removing active transfers. - running the transfer, to avoid removing active transfers.
-} -}
go remote transferrer = ifM (performTransfer transferrer AssistantLevel t (transferRemote info) (associatedFile info) liftAnnex) go remote sd transferrer = ifM (isRight <$> performTransfer sd AssistantLevel liftAnnex (transferRemote info) t info transferrer)
( do ( do
case associatedFile info of case associatedFile info of
AssociatedFile Nothing -> noop AssociatedFile Nothing -> noop

View file

@ -1,5 +1,8 @@
git-annex (8.20201128) UNRELEASED; urgency=medium git-annex (8.20201128) UNRELEASED; urgency=medium
* New config annex.stalldetection, remote.name.annex-stalldetection,
which can be used to deal with remotes that stall during transfers,
or are sometimes too slow to want to use.
* Fix hang on shutdown of external special remote using ASYNC protocol * Fix hang on shutdown of external special remote using ASYNC protocol
extension. (Reversion introduced in version 8.20201007.) extension. (Reversion introduced in version 8.20201007.)
* Fix bug that made the next download after an empty file from a ssh * Fix bug that made the next download after an empty file from a ssh

View file

@ -20,6 +20,7 @@ import Messages
import Messages.Internal import Messages.Internal
import Messages.Progress import Messages.Progress
import qualified Messages.JSON as JSON import qualified Messages.JSON as JSON
import Utility.Metered (BytesProcessed)
import Control.Monad.IO.Class (MonadIO) import Control.Monad.IO.Class (MonadIO)
@ -29,11 +30,16 @@ relaySerializedOutput
=> m (Either SerializedOutput r) => m (Either SerializedOutput r)
-- ^ Get next serialized output, or final value to return. -- ^ Get next serialized output, or final value to return.
-> (SerializedOutputResponse -> m ()) -> (SerializedOutputResponse -> m ())
-- ^ Send response to child process.
-> (Maybe BytesProcessed -> m ())
-- ^ When a progress meter is running, is updated with
-- progress meter values sent by the process.
-- When a progress meter is stopped, Nothing is sent.
-> (forall a. Annex a -> m a) -> (forall a. Annex a -> m a)
-- ^ Run an annex action in the monad. Will not be used with -- ^ Run an annex action in the monad. Will not be used with
-- actions that block for a long time. -- actions that block for a long time.
-> m r -> m r
relaySerializedOutput getso sendsor runannex = go Nothing relaySerializedOutput getso sendsor meterreport runannex = go Nothing
where where
go st = loop st >>= \case go st = loop st >>= \case
Right r -> return r Right r -> return r
@ -69,10 +75,14 @@ relaySerializedOutput getso sendsor runannex = go Nothing
-- output after the progress meter -- output after the progress meter
-- is done. -- is done.
Left _st' -> loop Nothing Left _st' -> loop Nothing
Left EndProgressMeter -> return (Left st) Left EndProgressMeter -> do
meterreport Nothing
return (Left st)
Left (UpdateProgressMeter n) -> do Left (UpdateProgressMeter n) -> do
case st of case st of
Just meterupdate -> liftIO $ meterupdate n Just meterupdate -> do
meterreport (Just n)
liftIO $ meterupdate n
Nothing -> noop Nothing -> noop
loop st loop st
Left StartPrompt -> do Left StartPrompt -> do

View file

@ -41,6 +41,7 @@ import Types.NumCopies
import Types.Difference import Types.Difference
import Types.RefSpec import Types.RefSpec
import Types.RepoVersion import Types.RepoVersion
import Types.StallDetection
import Config.DynamicConfig import Config.DynamicConfig
import Utility.HumanTime import Utility.HumanTime
import Utility.Gpg (GpgCmd, mkGpgCmd) import Utility.Gpg (GpgCmd, mkGpgCmd)
@ -116,6 +117,7 @@ data GitConfig = GitConfig
, annexRetry :: Maybe Integer , annexRetry :: Maybe Integer
, annexForwardRetry :: Maybe Integer , annexForwardRetry :: Maybe Integer
, annexRetryDelay :: Maybe Seconds , annexRetryDelay :: Maybe Seconds
, annexStallDetection :: Maybe StallDetection
, annexAllowedUrlSchemes :: S.Set Scheme , annexAllowedUrlSchemes :: S.Set Scheme
, annexAllowedIPAddresses :: String , annexAllowedIPAddresses :: String
, annexAllowUnverifiedDownloads :: Bool , annexAllowUnverifiedDownloads :: Bool
@ -202,6 +204,9 @@ extractGitConfig configsource r = GitConfig
, annexForwardRetry = getmayberead (annexConfig "forward-retry") , annexForwardRetry = getmayberead (annexConfig "forward-retry")
, annexRetryDelay = Seconds , annexRetryDelay = Seconds
<$> getmayberead (annexConfig "retrydelay") <$> getmayberead (annexConfig "retrydelay")
, annexStallDetection =
either (const Nothing) Just . parseStallDetection
=<< getmaybe (annexConfig "stalldetection")
, annexAllowedUrlSchemes = S.fromList $ map mkScheme $ , annexAllowedUrlSchemes = S.fromList $ map mkScheme $
maybe ["http", "https", "ftp"] words $ maybe ["http", "https", "ftp"] words $
getmaybe (annexConfig "security.allowed-url-schemes") getmaybe (annexConfig "security.allowed-url-schemes")
@ -306,6 +311,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexRetry :: Maybe Integer , remoteAnnexRetry :: Maybe Integer
, remoteAnnexForwardRetry :: Maybe Integer , remoteAnnexForwardRetry :: Maybe Integer
, remoteAnnexRetryDelay :: Maybe Seconds , remoteAnnexRetryDelay :: Maybe Seconds
, remoteAnnexStallDetection :: Maybe StallDetection
, remoteAnnexAllowUnverifiedDownloads :: Bool , remoteAnnexAllowUnverifiedDownloads :: Bool
, remoteAnnexConfigUUID :: Maybe UUID , remoteAnnexConfigUUID :: Maybe UUID
@ -369,6 +375,9 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexForwardRetry = getmayberead "forward-retry" , remoteAnnexForwardRetry = getmayberead "forward-retry"
, remoteAnnexRetryDelay = Seconds , remoteAnnexRetryDelay = Seconds
<$> getmayberead "retrydelay" <$> getmayberead "retrydelay"
, remoteAnnexStallDetection =
either (const Nothing) Just . parseStallDetection
=<< getmaybe "stalldetection"
, remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ , remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
getmaybe ("security-allow-unverified-downloads") getmaybe ("security-allow-unverified-downloads")
, remoteAnnexConfigUUID = toUUID <$> getmaybe "config-uuid" , remoteAnnexConfigUUID = toUUID <$> getmaybe "config-uuid"

29
Types/StallDetection.hs Normal file
View file

@ -0,0 +1,29 @@
{- types for stall detection
-
- Copyright 2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.StallDetection where
import Utility.DataUnits
import Utility.HumanTime
import Utility.Misc
-- Unless the given number of bytes have been sent over the given
-- amount of time, there's a stall.
data StallDetection = StallDetection ByteSize Duration
deriving (Show)
-- Parse eg, "0KiB/60s"
parseStallDetection :: String -> Either String StallDetection
parseStallDetection s =
let (bs, ds) = separate (== '/') s
in do
b <- maybe
(Left $ "Unable to parse stall detection amount " ++ bs)
Right
(readSize dataUnits bs)
d <- parseDuration ds
return (StallDetection b d)

View file

@ -34,7 +34,7 @@ module Utility.Process (
) where ) where
import qualified Utility.Process.Shim import qualified Utility.Process.Shim
import Utility.Process.Shim as X (CreateProcess(..), ProcessHandle, StdStream(..), CmdSpec(..), proc, getPid, getProcessExitCode, shell, terminateProcess) import Utility.Process.Shim as X (CreateProcess(..), ProcessHandle, StdStream(..), CmdSpec(..), proc, getPid, getProcessExitCode, shell, terminateProcess, interruptProcessGroupOf)
import Utility.Misc import Utility.Misc
import Utility.Exception import Utility.Exception
import Utility.Monad import Utility.Monad

View file

@ -1410,7 +1410,7 @@ Remotes are configured using these settings in `.git/config`.
The format is "$amount/$timeperiod". The format is "$amount/$timeperiod".
For example, to detect outright stalls where no data has been transferred For example, to detect outright stalls where no data has been transferred
after 30 seconds: `git config annex.stalldetection "0KiB/60s"` after 30 seconds: `git config annex.stalldetection "0/30s"`
Or, if you have a remote on a USB drive that is normally capable of Or, if you have a remote on a USB drive that is normally capable of
several megabytes per second, but has bad sectors where it gets several megabytes per second, but has bad sectors where it gets

View file

@ -1,3 +1,5 @@
In a number of scenarios (e.g. [[bugs/still_seeing_errors_with_parallel_git-annex-add]], [[bugs/parallel_copy_fails]], [[git-annex-sync/#comment-aceb18109c0a536e04bcdd3aa04bda29]]), `git-annex` operations may fail or hang due to transient conditions. It would help a lot if `git-annex` could be configured to fail timed-out operations, and to retry failed operations after a delay. This would especially help when using `git-annex` in a script or a higher-level tool. I've tried wrapping some retry logic around `git-annex` calls, but it seems `git-annex` itself is in the best position to do that sensibly (e.g. only retrying idempotent operations, or capping retries per remote). This would be a catch-all fix for unusual conditions that are hard to test for. In a number of scenarios (e.g. [[bugs/still_seeing_errors_with_parallel_git-annex-add]], [[bugs/parallel_copy_fails]], [[git-annex-sync/#comment-aceb18109c0a536e04bcdd3aa04bda29]]), `git-annex` operations may fail or hang due to transient conditions. It would help a lot if `git-annex` could be configured to fail timed-out operations, and to retry failed operations after a delay. This would especially help when using `git-annex` in a script or a higher-level tool. I've tried wrapping some retry logic around `git-annex` calls, but it seems `git-annex` itself is in the best position to do that sensibly (e.g. only retrying idempotent operations, or capping retries per remote). This would be a catch-all fix for unusual conditions that are hard to test for.
`git-annex` already has config options `annex.retry` and `annex.retry-delay`, but it seems that they don't cover all failure types. `git-annex` already has config options `annex.retry` and `annex.retry-delay`, but it seems that they don't cover all failure types.
> Added annex.stalldetection, [[done]] --[[Joey]]

View file

@ -1023,6 +1023,7 @@ Executable git-annex
Types.RepoVersion Types.RepoVersion
Types.ScheduledActivity Types.ScheduledActivity
Types.StandardGroups Types.StandardGroups
Types.StallDetection
Types.StoreRetrieve Types.StoreRetrieve
Types.Test Types.Test
Types.Transfer Types.Transfer