automatic stall detection
annex.stalldetection can now be set to "true" to make git-annex do automatic stall detection when it detects a remote is updating its transfer progress consistently enough. This commit was sponsored by Luke Shumaker on Patreon.
This commit is contained in:
parent
904689f11b
commit
135757d64a
8 changed files with 122 additions and 45 deletions
78
Annex/StallDetection.hs
Normal file
78
Annex/StallDetection.hs
Normal file
|
@ -0,0 +1,78 @@
|
|||
{- Stall detection for transfers.
|
||||
-
|
||||
- Copyright 2020-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.StallDetection (detectStalls, StallDetection) where
|
||||
|
||||
import Annex.Common
|
||||
import Types.StallDetection
|
||||
import Utility.Metered
|
||||
import Utility.HumanTime
|
||||
import Utility.DataUnits
|
||||
import Utility.ThreadScheduler
|
||||
|
||||
import Control.Concurrent.STM
|
||||
|
||||
detectStalls :: Maybe StallDetection -> TVar (Maybe BytesProcessed) -> IO () -> IO ()
|
||||
detectStalls Nothing _ _ = noop
|
||||
detectStalls (Just (StallDetection minsz duration)) metervar onstall =
|
||||
detectStalls' minsz duration metervar onstall Nothing
|
||||
detectStalls (Just ProbeStallDetection) metervar onstall = do
|
||||
-- Only do stall detection once the progress is confirmed to be
|
||||
-- consistently updating. After the first update, it needs to
|
||||
-- advance twice within 30 seconds. With that established,
|
||||
-- if no data at all is sent for a 60 second period, it's
|
||||
-- assumed to be a stall.
|
||||
v <- getval >>= waitforfirstupdate
|
||||
ontimelyadvance v $ \v' -> ontimelyadvance v' $
|
||||
detectStalls' 1 duration metervar onstall
|
||||
where
|
||||
getval = atomically $ fmap fromBytesProcessed
|
||||
<$> readTVar metervar
|
||||
|
||||
duration = Duration 60
|
||||
|
||||
delay = Seconds (fromIntegral (durationSeconds duration) `div` 2)
|
||||
|
||||
waitforfirstupdate startval = do
|
||||
threadDelaySeconds delay
|
||||
v <- getval
|
||||
if v > startval
|
||||
then return v
|
||||
else waitforfirstupdate startval
|
||||
|
||||
ontimelyadvance v cont = do
|
||||
threadDelaySeconds delay
|
||||
v' <- getval
|
||||
when (v' > v) $
|
||||
cont v'
|
||||
|
||||
detectStalls'
|
||||
:: ByteSize
|
||||
-> Duration
|
||||
-> TVar (Maybe BytesProcessed)
|
||||
-> IO ()
|
||||
-> Maybe ByteSize
|
||||
-> IO ()
|
||||
detectStalls' minsz duration metervar onstall st = do
|
||||
threadDelaySeconds delay
|
||||
-- Get whatever progress value was reported most recently, if any.
|
||||
v <- atomically $ fmap fromBytesProcessed
|
||||
<$> readTVar metervar
|
||||
let cont = detectStalls' minsz duration metervar onstall v
|
||||
case (st, v) of
|
||||
(Nothing, _) -> cont
|
||||
(_, Nothing) -> cont
|
||||
(Just prev, Just sofar)
|
||||
-- Just in case a progress meter somehow runs
|
||||
-- backwards, or a second progress meter was
|
||||
-- started and is at a smaller value than
|
||||
-- the previous one.
|
||||
| prev > sofar -> cont
|
||||
| sofar - prev < minsz -> onstall
|
||||
| otherwise -> cont
|
||||
where
|
||||
delay = Seconds (fromIntegral (durationSeconds duration))
|
|
@ -1,6 +1,6 @@
|
|||
{- A pool of "git-annex transferrer" processes
|
||||
-
|
||||
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2013-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -16,15 +16,13 @@ import Types.TransferrerPool
|
|||
import Types.Transferrer
|
||||
import Types.Transfer
|
||||
import qualified Types.Remote as Remote
|
||||
import Types.StallDetection
|
||||
import Types.Messages
|
||||
import Types.CleanupActions
|
||||
import Messages.Serialized
|
||||
import Annex.Path
|
||||
import Annex.StallDetection
|
||||
import Utility.Batch
|
||||
import Utility.Metered
|
||||
import Utility.HumanTime
|
||||
import Utility.ThreadScheduler
|
||||
import qualified Utility.SimpleProtocol as Proto
|
||||
|
||||
import Control.Concurrent
|
||||
|
@ -177,28 +175,6 @@ performTransfer stalldetection level runannex r t info transferrer = do
|
|||
updatemeter _bpv metervar Nothing = liftIO $
|
||||
atomically $ writeTVar metervar Nothing
|
||||
|
||||
detectStalls :: Maybe StallDetection -> TVar (Maybe BytesProcessed) -> IO () -> IO ()
|
||||
detectStalls Nothing _ _ = noop
|
||||
detectStalls (Just (StallDetection minsz duration)) metervar onstall = go Nothing
|
||||
where
|
||||
go st = do
|
||||
threadDelaySeconds (Seconds (fromIntegral (durationSeconds duration)))
|
||||
-- Get whatever progress value was reported last, if any.
|
||||
v <- atomically $ fmap fromBytesProcessed
|
||||
<$> readTVar metervar
|
||||
let cont = go v
|
||||
case (st, v) of
|
||||
(Nothing, _) -> cont
|
||||
(_, Nothing) -> cont
|
||||
(Just prev, Just sofar)
|
||||
-- Just in case a progress meter somehow runs
|
||||
-- backwards, or a second progress meter was
|
||||
-- started and is at a smaller value than
|
||||
-- the previous one.
|
||||
| prev > sofar -> cont
|
||||
| sofar - prev < minsz -> onstall
|
||||
| otherwise -> cont
|
||||
|
||||
{- Starts a new git-annex transfer process, setting up handles
|
||||
- that will be used to communicate with it. -}
|
||||
mkTransferrer :: SignalActionsVar -> RunTransferrer -> IO Transferrer
|
||||
|
|
|
@ -15,6 +15,9 @@ git-annex (8.20210128) UNRELEASED; urgency=medium
|
|||
* Fix build on openbsd.
|
||||
Thanks, James Cook for the patch.
|
||||
* Include libkqueue.h file needed to build the assistant on BSDs.
|
||||
* annex.stalldetection can now be set to "true" to make git-annex
|
||||
do automatic stall detection when it detects a remote is updating its
|
||||
transfer progress consistently enough.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2021 12:34:32 -0400
|
||||
|
||||
|
|
|
@ -205,7 +205,7 @@ extractGitConfig configsource r = GitConfig
|
|||
, annexRetryDelay = Seconds
|
||||
<$> getmayberead (annexConfig "retrydelay")
|
||||
, annexStallDetection =
|
||||
either (const Nothing) Just . parseStallDetection
|
||||
either (const Nothing) id . parseStallDetection
|
||||
=<< getmaybe (annexConfig "stalldetection")
|
||||
, annexAllowedUrlSchemes = S.fromList $ map mkScheme $
|
||||
maybe ["http", "https", "ftp"] words $
|
||||
|
@ -377,7 +377,7 @@ extractRemoteGitConfig r remotename = do
|
|||
, remoteAnnexRetryDelay = Seconds
|
||||
<$> getmayberead "retrydelay"
|
||||
, remoteAnnexStallDetection =
|
||||
either (const Nothing) Just . parseStallDetection
|
||||
either (const Nothing) id . parseStallDetection
|
||||
=<< getmaybe "stalldetection"
|
||||
, remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
|
||||
getmaybe ("security-allow-unverified-downloads")
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- types for stall detection
|
||||
-
|
||||
- Copyright 2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2020-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -10,20 +10,32 @@ module Types.StallDetection where
|
|||
import Utility.DataUnits
|
||||
import Utility.HumanTime
|
||||
import Utility.Misc
|
||||
import Git.Config
|
||||
|
||||
-- Unless the given number of bytes have been sent over the given
|
||||
-- amount of time, there's a stall.
|
||||
data StallDetection = StallDetection ByteSize Duration
|
||||
data StallDetection
|
||||
= StallDetection ByteSize Duration
|
||||
-- ^ Unless the given number of bytes have been sent over the given
|
||||
-- amount of time, there's a stall.
|
||||
| ProbeStallDetection
|
||||
-- ^ Used when unsure how frequently transfer progress is updated,
|
||||
-- or how fast data can be sent.
|
||||
deriving (Show)
|
||||
|
||||
-- Parse eg, "0KiB/60s"
|
||||
parseStallDetection :: String -> Either String StallDetection
|
||||
parseStallDetection s =
|
||||
--
|
||||
-- Also, it can be set to "true" (or other git config equivilants)
|
||||
-- to enable ProbeStallDetection.
|
||||
-- And "false" (and other git config equivilants) explicitly
|
||||
-- disable stall detection.
|
||||
parseStallDetection :: String -> Either String (Maybe StallDetection)
|
||||
parseStallDetection s = case isTrueFalse s of
|
||||
Nothing -> do
|
||||
let (bs, ds) = separate (== '/') s
|
||||
in do
|
||||
b <- maybe
|
||||
(Left $ "Unable to parse stall detection amount " ++ bs)
|
||||
Right
|
||||
(readSize dataUnits bs)
|
||||
d <- parseDuration ds
|
||||
return (StallDetection b d)
|
||||
return (Just (StallDetection b d))
|
||||
Just True -> Right (Just ProbeStallDetection)
|
||||
Just False -> Right Nothing
|
||||
|
|
|
@ -1427,12 +1427,18 @@ Remotes are configured using these settings in `.git/config`.
|
|||
When this happens, the transfer will be considered to have failed, so
|
||||
settings like annex.retry will control what it does next.
|
||||
|
||||
The value specifies how much data git-annex should expect to see
|
||||
flowing, minimum, when it's not stalled, over a given period of time.
|
||||
The format is "$amount/$timeperiod".
|
||||
Set to "true" to enable automatic stall detection. With this setting,
|
||||
if a remote does not update its progress consistently, no stall detection
|
||||
will be done. And it may take a while for git-annex to decide a remote
|
||||
is really stalled when using automatic stall detection, since it needs
|
||||
to be conservative about what looks like a stall.
|
||||
|
||||
For more fine control over what constitutes a stall, set to a value in
|
||||
the form "$amount/$timeperiod" to specify how much data git-annex should
|
||||
expect to see flowing, minimum, over a given period of time.
|
||||
|
||||
For example, to detect outright stalls where no data has been transferred
|
||||
after 30 seconds: `git config annex.stalldetection "0/30s"`
|
||||
after 30 seconds: `git config annex.stalldetection "1KB/30s"`
|
||||
|
||||
Or, if you have a remote on a USB drive that is normally capable of
|
||||
several megabytes per second, but has bad sectors where it gets
|
||||
|
|
|
@ -3,14 +3,15 @@ to detect and retry stalls. But most users are not going to configure this.
|
|||
Could something be done to dynamically detect a stall, without configuration?
|
||||
|
||||
Eg, wait until data starts to flow, and then check if there's at least some
|
||||
data being sent each minute. If so, the progress display is being updated
|
||||
at least every minute. So then if 2 minutes go by without more data
|
||||
data being sent each half minute. If so, the progress display is being updated
|
||||
at least every minute. So then if 1 minute goes by without more data
|
||||
flowing, it's almost certainly stalled. And if the progress display is
|
||||
updated less frequently, see if it's updated every 2 minutes, etc. Although
|
||||
realistically, progress displays are updated every chunk, and there's
|
||||
typically more than 1 chunk per minute. So longer durations than 1 minute
|
||||
may be an unncessary complication. And a couple of minutes to detect a
|
||||
stall is fine.
|
||||
may be an unncessary complication. And a minute to detect a stall is fine.
|
||||
|
||||
> Implemented this, annex.stalldetection = true enables automatic.
|
||||
|
||||
It may still need a config to turn it on, because running
|
||||
transfers in separate processes can lead to more resource use, or even
|
||||
|
|
|
@ -662,6 +662,7 @@ Executable git-annex
|
|||
Annex.SpecialRemote
|
||||
Annex.SpecialRemote.Config
|
||||
Annex.Ssh
|
||||
Annex.StallDetection
|
||||
Annex.TaggedPush
|
||||
Annex.Tmp
|
||||
Annex.Transfer
|
||||
|
|
Loading…
Reference in a new issue