automatic stall detection
annex.stalldetection can now be set to "true" to make git-annex do automatic stall detection when it detects a remote is updating its transfer progress consistently enough. This commit was sponsored by Luke Shumaker on Patreon.
This commit is contained in:
parent
904689f11b
commit
135757d64a
8 changed files with 122 additions and 45 deletions
78
Annex/StallDetection.hs
Normal file
78
Annex/StallDetection.hs
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
{- Stall detection for transfers.
|
||||||
|
-
|
||||||
|
- Copyright 2020-2021 Joey Hess <id@joeyh.name>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Annex.StallDetection (detectStalls, StallDetection) where
|
||||||
|
|
||||||
|
import Annex.Common
|
||||||
|
import Types.StallDetection
|
||||||
|
import Utility.Metered
|
||||||
|
import Utility.HumanTime
|
||||||
|
import Utility.DataUnits
|
||||||
|
import Utility.ThreadScheduler
|
||||||
|
|
||||||
|
import Control.Concurrent.STM
|
||||||
|
|
||||||
|
detectStalls :: Maybe StallDetection -> TVar (Maybe BytesProcessed) -> IO () -> IO ()
|
||||||
|
detectStalls Nothing _ _ = noop
|
||||||
|
detectStalls (Just (StallDetection minsz duration)) metervar onstall =
|
||||||
|
detectStalls' minsz duration metervar onstall Nothing
|
||||||
|
detectStalls (Just ProbeStallDetection) metervar onstall = do
|
||||||
|
-- Only do stall detection once the progress is confirmed to be
|
||||||
|
-- consistently updating. After the first update, it needs to
|
||||||
|
-- advance twice within 30 seconds. With that established,
|
||||||
|
-- if no data at all is sent for a 60 second period, it's
|
||||||
|
-- assumed to be a stall.
|
||||||
|
v <- getval >>= waitforfirstupdate
|
||||||
|
ontimelyadvance v $ \v' -> ontimelyadvance v' $
|
||||||
|
detectStalls' 1 duration metervar onstall
|
||||||
|
where
|
||||||
|
getval = atomically $ fmap fromBytesProcessed
|
||||||
|
<$> readTVar metervar
|
||||||
|
|
||||||
|
duration = Duration 60
|
||||||
|
|
||||||
|
delay = Seconds (fromIntegral (durationSeconds duration) `div` 2)
|
||||||
|
|
||||||
|
waitforfirstupdate startval = do
|
||||||
|
threadDelaySeconds delay
|
||||||
|
v <- getval
|
||||||
|
if v > startval
|
||||||
|
then return v
|
||||||
|
else waitforfirstupdate startval
|
||||||
|
|
||||||
|
ontimelyadvance v cont = do
|
||||||
|
threadDelaySeconds delay
|
||||||
|
v' <- getval
|
||||||
|
when (v' > v) $
|
||||||
|
cont v'
|
||||||
|
|
||||||
|
detectStalls'
|
||||||
|
:: ByteSize
|
||||||
|
-> Duration
|
||||||
|
-> TVar (Maybe BytesProcessed)
|
||||||
|
-> IO ()
|
||||||
|
-> Maybe ByteSize
|
||||||
|
-> IO ()
|
||||||
|
detectStalls' minsz duration metervar onstall st = do
|
||||||
|
threadDelaySeconds delay
|
||||||
|
-- Get whatever progress value was reported most recently, if any.
|
||||||
|
v <- atomically $ fmap fromBytesProcessed
|
||||||
|
<$> readTVar metervar
|
||||||
|
let cont = detectStalls' minsz duration metervar onstall v
|
||||||
|
case (st, v) of
|
||||||
|
(Nothing, _) -> cont
|
||||||
|
(_, Nothing) -> cont
|
||||||
|
(Just prev, Just sofar)
|
||||||
|
-- Just in case a progress meter somehow runs
|
||||||
|
-- backwards, or a second progress meter was
|
||||||
|
-- started and is at a smaller value than
|
||||||
|
-- the previous one.
|
||||||
|
| prev > sofar -> cont
|
||||||
|
| sofar - prev < minsz -> onstall
|
||||||
|
| otherwise -> cont
|
||||||
|
where
|
||||||
|
delay = Seconds (fromIntegral (durationSeconds duration))
|
|
@ -1,6 +1,6 @@
|
||||||
{- A pool of "git-annex transferrer" processes
|
{- A pool of "git-annex transferrer" processes
|
||||||
-
|
-
|
||||||
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
|
- Copyright 2013-2021 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -16,15 +16,13 @@ import Types.TransferrerPool
|
||||||
import Types.Transferrer
|
import Types.Transferrer
|
||||||
import Types.Transfer
|
import Types.Transfer
|
||||||
import qualified Types.Remote as Remote
|
import qualified Types.Remote as Remote
|
||||||
import Types.StallDetection
|
|
||||||
import Types.Messages
|
import Types.Messages
|
||||||
import Types.CleanupActions
|
import Types.CleanupActions
|
||||||
import Messages.Serialized
|
import Messages.Serialized
|
||||||
import Annex.Path
|
import Annex.Path
|
||||||
|
import Annex.StallDetection
|
||||||
import Utility.Batch
|
import Utility.Batch
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
import Utility.HumanTime
|
|
||||||
import Utility.ThreadScheduler
|
|
||||||
import qualified Utility.SimpleProtocol as Proto
|
import qualified Utility.SimpleProtocol as Proto
|
||||||
|
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
|
@ -177,28 +175,6 @@ performTransfer stalldetection level runannex r t info transferrer = do
|
||||||
updatemeter _bpv metervar Nothing = liftIO $
|
updatemeter _bpv metervar Nothing = liftIO $
|
||||||
atomically $ writeTVar metervar Nothing
|
atomically $ writeTVar metervar Nothing
|
||||||
|
|
||||||
detectStalls :: Maybe StallDetection -> TVar (Maybe BytesProcessed) -> IO () -> IO ()
|
|
||||||
detectStalls Nothing _ _ = noop
|
|
||||||
detectStalls (Just (StallDetection minsz duration)) metervar onstall = go Nothing
|
|
||||||
where
|
|
||||||
go st = do
|
|
||||||
threadDelaySeconds (Seconds (fromIntegral (durationSeconds duration)))
|
|
||||||
-- Get whatever progress value was reported last, if any.
|
|
||||||
v <- atomically $ fmap fromBytesProcessed
|
|
||||||
<$> readTVar metervar
|
|
||||||
let cont = go v
|
|
||||||
case (st, v) of
|
|
||||||
(Nothing, _) -> cont
|
|
||||||
(_, Nothing) -> cont
|
|
||||||
(Just prev, Just sofar)
|
|
||||||
-- Just in case a progress meter somehow runs
|
|
||||||
-- backwards, or a second progress meter was
|
|
||||||
-- started and is at a smaller value than
|
|
||||||
-- the previous one.
|
|
||||||
| prev > sofar -> cont
|
|
||||||
| sofar - prev < minsz -> onstall
|
|
||||||
| otherwise -> cont
|
|
||||||
|
|
||||||
{- Starts a new git-annex transfer process, setting up handles
|
{- Starts a new git-annex transfer process, setting up handles
|
||||||
- that will be used to communicate with it. -}
|
- that will be used to communicate with it. -}
|
||||||
mkTransferrer :: SignalActionsVar -> RunTransferrer -> IO Transferrer
|
mkTransferrer :: SignalActionsVar -> RunTransferrer -> IO Transferrer
|
||||||
|
|
|
@ -15,6 +15,9 @@ git-annex (8.20210128) UNRELEASED; urgency=medium
|
||||||
* Fix build on openbsd.
|
* Fix build on openbsd.
|
||||||
Thanks, James Cook for the patch.
|
Thanks, James Cook for the patch.
|
||||||
* Include libkqueue.h file needed to build the assistant on BSDs.
|
* Include libkqueue.h file needed to build the assistant on BSDs.
|
||||||
|
* annex.stalldetection can now be set to "true" to make git-annex
|
||||||
|
do automatic stall detection when it detects a remote is updating its
|
||||||
|
transfer progress consistently enough.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2021 12:34:32 -0400
|
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2021 12:34:32 -0400
|
||||||
|
|
||||||
|
|
|
@ -205,7 +205,7 @@ extractGitConfig configsource r = GitConfig
|
||||||
, annexRetryDelay = Seconds
|
, annexRetryDelay = Seconds
|
||||||
<$> getmayberead (annexConfig "retrydelay")
|
<$> getmayberead (annexConfig "retrydelay")
|
||||||
, annexStallDetection =
|
, annexStallDetection =
|
||||||
either (const Nothing) Just . parseStallDetection
|
either (const Nothing) id . parseStallDetection
|
||||||
=<< getmaybe (annexConfig "stalldetection")
|
=<< getmaybe (annexConfig "stalldetection")
|
||||||
, annexAllowedUrlSchemes = S.fromList $ map mkScheme $
|
, annexAllowedUrlSchemes = S.fromList $ map mkScheme $
|
||||||
maybe ["http", "https", "ftp"] words $
|
maybe ["http", "https", "ftp"] words $
|
||||||
|
@ -377,7 +377,7 @@ extractRemoteGitConfig r remotename = do
|
||||||
, remoteAnnexRetryDelay = Seconds
|
, remoteAnnexRetryDelay = Seconds
|
||||||
<$> getmayberead "retrydelay"
|
<$> getmayberead "retrydelay"
|
||||||
, remoteAnnexStallDetection =
|
, remoteAnnexStallDetection =
|
||||||
either (const Nothing) Just . parseStallDetection
|
either (const Nothing) id . parseStallDetection
|
||||||
=<< getmaybe "stalldetection"
|
=<< getmaybe "stalldetection"
|
||||||
, remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
|
, remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
|
||||||
getmaybe ("security-allow-unverified-downloads")
|
getmaybe ("security-allow-unverified-downloads")
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- types for stall detection
|
{- types for stall detection
|
||||||
-
|
-
|
||||||
- Copyright 2020 Joey Hess <id@joeyh.name>
|
- Copyright 2020-2021 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -10,20 +10,32 @@ module Types.StallDetection where
|
||||||
import Utility.DataUnits
|
import Utility.DataUnits
|
||||||
import Utility.HumanTime
|
import Utility.HumanTime
|
||||||
import Utility.Misc
|
import Utility.Misc
|
||||||
|
import Git.Config
|
||||||
|
|
||||||
-- Unless the given number of bytes have been sent over the given
|
data StallDetection
|
||||||
-- amount of time, there's a stall.
|
= StallDetection ByteSize Duration
|
||||||
data StallDetection = StallDetection ByteSize Duration
|
-- ^ Unless the given number of bytes have been sent over the given
|
||||||
|
-- amount of time, there's a stall.
|
||||||
|
| ProbeStallDetection
|
||||||
|
-- ^ Used when unsure how frequently transfer progress is updated,
|
||||||
|
-- or how fast data can be sent.
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
-- Parse eg, "0KiB/60s"
|
-- Parse eg, "0KiB/60s"
|
||||||
parseStallDetection :: String -> Either String StallDetection
|
--
|
||||||
parseStallDetection s =
|
-- Also, it can be set to "true" (or other git config equivilants)
|
||||||
let (bs, ds) = separate (== '/') s
|
-- to enable ProbeStallDetection.
|
||||||
in do
|
-- And "false" (and other git config equivilants) explicitly
|
||||||
|
-- disable stall detection.
|
||||||
|
parseStallDetection :: String -> Either String (Maybe StallDetection)
|
||||||
|
parseStallDetection s = case isTrueFalse s of
|
||||||
|
Nothing -> do
|
||||||
|
let (bs, ds) = separate (== '/') s
|
||||||
b <- maybe
|
b <- maybe
|
||||||
(Left $ "Unable to parse stall detection amount " ++ bs)
|
(Left $ "Unable to parse stall detection amount " ++ bs)
|
||||||
Right
|
Right
|
||||||
(readSize dataUnits bs)
|
(readSize dataUnits bs)
|
||||||
d <- parseDuration ds
|
d <- parseDuration ds
|
||||||
return (StallDetection b d)
|
return (Just (StallDetection b d))
|
||||||
|
Just True -> Right (Just ProbeStallDetection)
|
||||||
|
Just False -> Right Nothing
|
||||||
|
|
|
@ -1427,12 +1427,18 @@ Remotes are configured using these settings in `.git/config`.
|
||||||
When this happens, the transfer will be considered to have failed, so
|
When this happens, the transfer will be considered to have failed, so
|
||||||
settings like annex.retry will control what it does next.
|
settings like annex.retry will control what it does next.
|
||||||
|
|
||||||
The value specifies how much data git-annex should expect to see
|
Set to "true" to enable automatic stall detection. With this setting,
|
||||||
flowing, minimum, when it's not stalled, over a given period of time.
|
if a remote does not update its progress consistently, no stall detection
|
||||||
The format is "$amount/$timeperiod".
|
will be done. And it may take a while for git-annex to decide a remote
|
||||||
|
is really stalled when using automatic stall detection, since it needs
|
||||||
|
to be conservative about what looks like a stall.
|
||||||
|
|
||||||
|
For more fine control over what constitutes a stall, set to a value in
|
||||||
|
the form "$amount/$timeperiod" to specify how much data git-annex should
|
||||||
|
expect to see flowing, minimum, over a given period of time.
|
||||||
|
|
||||||
For example, to detect outright stalls where no data has been transferred
|
For example, to detect outright stalls where no data has been transferred
|
||||||
after 30 seconds: `git config annex.stalldetection "0/30s"`
|
after 30 seconds: `git config annex.stalldetection "1KB/30s"`
|
||||||
|
|
||||||
Or, if you have a remote on a USB drive that is normally capable of
|
Or, if you have a remote on a USB drive that is normally capable of
|
||||||
several megabytes per second, but has bad sectors where it gets
|
several megabytes per second, but has bad sectors where it gets
|
||||||
|
|
|
@ -3,14 +3,15 @@ to detect and retry stalls. But most users are not going to configure this.
|
||||||
Could something be done to dynamically detect a stall, without configuration?
|
Could something be done to dynamically detect a stall, without configuration?
|
||||||
|
|
||||||
Eg, wait until data starts to flow, and then check if there's at least some
|
Eg, wait until data starts to flow, and then check if there's at least some
|
||||||
data being sent each minute. If so, the progress display is being updated
|
data being sent each half minute. If so, the progress display is being updated
|
||||||
at least every minute. So then if 2 minutes go by without more data
|
at least every minute. So then if 1 minute goes by without more data
|
||||||
flowing, it's almost certainly stalled. And if the progress display is
|
flowing, it's almost certainly stalled. And if the progress display is
|
||||||
updated less frequently, see if it's updated every 2 minutes, etc. Although
|
updated less frequently, see if it's updated every 2 minutes, etc. Although
|
||||||
realistically, progress displays are updated every chunk, and there's
|
realistically, progress displays are updated every chunk, and there's
|
||||||
typically more than 1 chunk per minute. So longer durations than 1 minute
|
typically more than 1 chunk per minute. So longer durations than 1 minute
|
||||||
may be an unncessary complication. And a couple of minutes to detect a
|
may be an unncessary complication. And a minute to detect a stall is fine.
|
||||||
stall is fine.
|
|
||||||
|
> Implemented this, annex.stalldetection = true enables automatic.
|
||||||
|
|
||||||
It may still need a config to turn it on, because running
|
It may still need a config to turn it on, because running
|
||||||
transfers in separate processes can lead to more resource use, or even
|
transfers in separate processes can lead to more resource use, or even
|
||||||
|
|
|
@ -662,6 +662,7 @@ Executable git-annex
|
||||||
Annex.SpecialRemote
|
Annex.SpecialRemote
|
||||||
Annex.SpecialRemote.Config
|
Annex.SpecialRemote.Config
|
||||||
Annex.Ssh
|
Annex.Ssh
|
||||||
|
Annex.StallDetection
|
||||||
Annex.TaggedPush
|
Annex.TaggedPush
|
||||||
Annex.Tmp
|
Annex.Tmp
|
||||||
Annex.Transfer
|
Annex.Transfer
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue