From 18e00500ce7e0818da6b6f4ec5f67779468d1aa4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 21 Sep 2021 16:58:02 -0400 Subject: [PATCH 1/4] bwlimit Added annex.bwlimit and remote.name.annex-bwlimit config that works for git remotes and many but not all special remotes. This nearly works, at least for a git remote on the same disk. With it set to 100kb/1s, the meter displays an actual bandwidth of 128 kb/s, with occasional spikes to 160 kb/s. So it needs to delay just a bit longer... I'm unsure why. However, at the beginning a lot of data flows before it determines the right bandwidth limit. A granularity of less than 1s would probably improve that. And, I don't know yet if it makes sense to have it be 100ks/1s rather than 100kb/s. Is there a situation where the user would want a larger granularity? Does granulatity need to be configurable at all? I only used that format for the config really in order to reuse an existing parser. This can't support for external special remotes, or for ones that themselves shell out to an external command. (Well, it could, but it would involve pausing and resuming the child process tree, which seems very hard to implement and very strange besides.) There could also be some built-in special remotes that it still doesn't work for, due to them not having a progress meter whose displays blocks the bandwidth using thread. But I don't think there are actually any that run a separate thread for downloads than the thread that displays the progress meter. Sponsored-by: Graham Spencer on Patreon --- Annex/Import.hs | 6 ++++-- Annex/StallDetection.hs | 2 +- Annex/Transfer.hs | 7 +++++++ Annex/YoutubeDl.hs | 2 +- CHANGELOG | 2 ++ Command/Add.hs | 2 +- Messages/Progress.hs | 34 +++++++++++++++++++------------ Messages/Serialized.hs | 2 +- Remote/Git.hs | 9 ++++++--- Remote/Helper/P2P.hs | 15 ++++++++------ Remote/Helper/Special.hs | 6 ++++-- Remote/P2P.hs | 4 ++-- Types/GitConfig.hs | 12 +++++++++-- Types/StallDetection.hs | 32 +++++++++++++++++++----------- Utility/Metered.hs | 43 ++++++++++++++++++++++++++++++++++++++++ doc/git-annex.mdwn | 21 ++++++++++++++++++++ 16 files changed, 153 insertions(+), 46 deletions(-) diff --git a/Annex/Import.hs b/Annex/Import.hs index 453965ba79..010808ef3f 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -461,8 +461,9 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec , providedMimeEncoding = Nothing , providedLinkType = Nothing } + bwlimit <- bwLimit (Remote.gitconfig remote) islargefile <- checkMatcher' matcher mi mempty - metered Nothing sz $ const $ if islargefile + metered Nothing sz bwlimit $ const $ if islargefile then doimportlarge importkey cidmap db loc cid sz f else doimportsmall cidmap db loc cid sz @@ -557,11 +558,12 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec Left e -> do warning (show e) return Nothing + bwlimit <- bwLimit (Remote.gitconfig remote) checkDiskSpaceToGet tmpkey Nothing $ notifyTransfer Download af $ download' (Remote.uuid remote) tmpkey af Nothing stdRetry $ \p -> withTmp tmpkey $ \tmpfile -> - metered (Just p) tmpkey $ + metered (Just p) tmpkey bwlimit $ const (rundownload tmpfile) where tmpkey = importKey cid sz diff --git a/Annex/StallDetection.hs b/Annex/StallDetection.hs index 02540a4732..9c095b4c86 100644 --- a/Annex/StallDetection.hs +++ b/Annex/StallDetection.hs @@ -22,7 +22,7 @@ import Control.Monad.IO.Class (MonadIO) detectStalls :: (Monad m, MonadIO m) => Maybe StallDetection -> TVar (Maybe BytesProcessed) -> m () -> m () detectStalls Nothing _ _ = noop detectStalls (Just StallDetectionDisabled) _ _ = noop -detectStalls (Just (StallDetection minsz duration)) metervar onstall = +detectStalls (Just (StallDetection (BwRate minsz duration))) metervar onstall = detectStalls' minsz duration metervar onstall Nothing detectStalls (Just ProbeStallDetection) metervar onstall = do -- Only do stall detection once the progress is confirmed to be diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index c57dbaf3ec..c2c3b582ec 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -20,6 +20,7 @@ module Annex.Transfer ( stdRetry, pickRemote, stallDetection, + bwLimit, ) where import Annex.Common @@ -406,3 +407,9 @@ stallDetection r = maybe globalcfg (pure . Just) remotecfg where globalcfg = annexStallDetection <$> Annex.getGitConfig remotecfg = remoteAnnexStallDetection $ Remote.gitconfig r + +bwLimit :: RemoteGitConfig -> Annex (Maybe BwRate) +bwLimit gc = maybe globalcfg (pure . Just) remotecfg + where + globalcfg = annexBwLimit <$> Annex.getGitConfig + remotecfg = remoteAnnexBwLimit gc diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs index 5cbc9e7f3b..52219827ae 100644 --- a/Annex/YoutubeDl.hs +++ b/Annex/YoutubeDl.hs @@ -96,7 +96,7 @@ youtubeDl' url workdir p uo -- with the size, which is why it's important the -- meter is passed into commandMeter' let unknownsize = Nothing :: Maybe FileSize - ok <- metered (Just p) unknownsize $ \meter meterupdate -> + ok <- metered (Just p) unknownsize Nothing $ \meter meterupdate -> liftIO $ commandMeter' parseYoutubeDlProgress oh (Just meter) meterupdate cmd opts (\pr -> pr { cwd = Just workdir }) diff --git a/CHANGELOG b/CHANGELOG index 538ae62a49..fd9f1d06ef 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ git-annex (8.20210904) UNRELEASED; urgency=medium + * Added annex.bwlimit and remote.name.annex-bwlimit config that works + for git remotes and many but not all special remotes. * borg: Avoid trying to extract xattrs, ACLS, and bsdflags when retrieving from a borg repository. diff --git a/Command/Add.hs b/Command/Add.hs index 0fe4351ab0..4da6f7354f 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -192,7 +192,7 @@ perform o file addunlockedmatcher = withOtherTmp $ \tmpdir -> do } ld <- lockDown cfg (fromRawFilePath file) let sizer = keySource <$> ld - v <- metered Nothing sizer $ \_meter meterupdate -> + v <- metered Nothing sizer Nothing $ \_meter meterupdate -> ingestAdd (checkGitIgnoreOption o) meterupdate ld finish v where diff --git a/Messages/Progress.hs b/Messages/Progress.hs index 397aebbb37..832dd9e0f3 100644 --- a/Messages/Progress.hs +++ b/Messages/Progress.hs @@ -1,6 +1,6 @@ {- git-annex progress output - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -18,6 +18,7 @@ import Types import Types.Messages import Types.Key import Types.KeySource +import Types.StallDetection (BwRate(..)) import Utility.InodeCache import qualified Messages.JSON as JSON import Messages.Concurrent @@ -72,11 +73,12 @@ metered :: MeterSize sizer => Maybe MeterUpdate -> sizer + -> Maybe BwRate -> (Meter -> MeterUpdate -> Annex a) -> Annex a -metered othermeter sizer a = withMessageState $ \st -> do +metered othermeterupdate sizer bwlimit a = withMessageState $ \st -> do sz <- getMeterSize sizer - metered' st setclear othermeter sz showOutput a + metered' st setclear othermeterupdate sz bwlimit showOutput a where setclear c = Annex.changeState $ \st -> st { Annex.output = (Annex.output st) { clearProgressMeter = c } } @@ -90,11 +92,12 @@ metered' -- NormalOutput. -> Maybe MeterUpdate -> Maybe TotalSize + -> Maybe BwRate -> m () -- ^ this should run showOutput -> (Meter -> MeterUpdate -> m a) -> m a -metered' st setclear othermeter msize showoutput a = go st +metered' st setclear othermeterupdate msize bwlimit showoutput a = go st where go (MessageState { outputType = QuietOutput }) = nometer go (MessageState { outputType = NormalOutput, concurrentOutputEnabled = False }) = do @@ -105,7 +108,7 @@ metered' st setclear othermeter msize showoutput a = go st setclear clear m <- liftIO $ rateLimitMeterUpdate consoleratelimit meter $ updateMeter meter - r <- a meter (combinemeter m) + r <- a meter =<< mkmeterupdate m setclear noop liftIO clear return r @@ -116,7 +119,7 @@ metered' st setclear othermeter msize showoutput a = go st in Regions.setConsoleRegion r ('\n' : s) m <- liftIO $ rateLimitMeterUpdate consoleratelimit meter $ updateMeter meter - a meter (combinemeter m) + a meter =<< mkmeterupdate m go (MessageState { outputType = JSONOutput jsonoptions }) | jsonProgress jsonoptions = do let buf = jsonBuffer st @@ -124,7 +127,7 @@ metered' st setclear othermeter msize showoutput a = go st JSON.progress buf msize' (meterBytesProcessed new) m <- liftIO $ rateLimitMeterUpdate jsonratelimit meter $ updateMeter meter - a meter (combinemeter m) + a meter =<< mkmeterupdate m | otherwise = nometer go (MessageState { outputType = SerializedOutput h _ }) = do liftIO $ outputSerialized h BeginProgressMeter @@ -144,16 +147,21 @@ metered' st setclear othermeter msize showoutput a = go st meterBytesProcessed new m <- liftIO $ rateLimitMeterUpdate minratelimit meter $ updateMeter meter - a meter (combinemeter m) + (a meter =<< mkmeterupdate m) `finally` (liftIO $ outputSerialized h EndProgressMeter) nometer = do dummymeter <- liftIO $ mkMeter Nothing $ \_ _ _ _ -> return () - a dummymeter (combinemeter (const noop)) + a dummymeter =<< mkmeterupdate (const noop) - combinemeter m = case othermeter of - Nothing -> m - Just om -> combineMeterUpdate m om + mkmeterupdate m = + let mu = case othermeterupdate of + Nothing -> m + Just om -> combineMeterUpdate m om + in case bwlimit of + Nothing -> return mu + Just (BwRate sz duration) -> liftIO $ + bwLimitMeterUpdate sz duration mu consoleratelimit = 0.2 @@ -164,7 +172,7 @@ metered' st setclear othermeter msize showoutput a = go st {- Poll file size to display meter. -} meteredFile :: FilePath -> Maybe MeterUpdate -> Key -> Annex a -> Annex a meteredFile file combinemeterupdate key a = - metered combinemeterupdate key $ \_ p -> + metered combinemeterupdate key Nothing $ \_ p -> watchFileSize file p a {- Progress dots. -} diff --git a/Messages/Serialized.hs b/Messages/Serialized.hs index 0f5faddc9f..3f20d1a27d 100644 --- a/Messages/Serialized.hs +++ b/Messages/Serialized.hs @@ -68,7 +68,7 @@ relaySerializedOutput getso sendsor meterreport runannex = go Nothing let setclear = const noop -- Display a progress meter while running, until -- the meter ends or a final value is returned. - metered' ost setclear Nothing Nothing (runannex showOutput) + metered' ost setclear Nothing Nothing Nothing (runannex showOutput) (\meter meterupdate -> loop (Just (meter, meterupdate))) >>= \case Right r -> return (Right r) diff --git a/Remote/Git.hs b/Remote/Git.hs index 81a6bc5fdf..65d388bf92 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -550,6 +550,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met | not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do u <- getUUID hardlink <- wantHardLink + bwlimit <- bwLimit (gitconfig r) -- run copy from perspective of remote onLocalFast st $ Annex.Content.prepSendAnnex' key >>= \case Just (object, check) -> do @@ -559,7 +560,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met copier <- mkFileCopier hardlink st (ok, v) <- runTransfer (Transfer Download u (fromKey id key)) file Nothing stdRetry $ \p -> - metered (Just (combineMeterUpdate p meterupdate)) key $ \_ p' -> + metered (Just (combineMeterUpdate p meterupdate)) key bwlimit $ \_ p' -> copier object dest key p' checksuccess vc if ok then return v @@ -572,6 +573,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met then return v else giveup "failed to retrieve content from remote" else P2PHelper.retrieve + (gitconfig r) (\p -> Ssh.runProto r connpool (return (False, UnVerified)) (fallback p)) key file dest meterupdate vc | otherwise = giveup "copying from non-ssh, non-http remote not supported" @@ -680,7 +682,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate , giveup "remote does not have expected annex.uuid value" ) | Git.repoIsSsh repo = commitOnCleanup repo r st $ - P2PHelper.store + P2PHelper.store (gitconfig r) (Ssh.runProto r connpool (return False) . copyremotefallback) key file meterupdate @@ -694,6 +696,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate checkio <- Annex.withCurrentState check u <- getUUID hardlink <- wantHardLink + bwlimit <- bwLimit (gitconfig r) -- run copy from perspective of remote res <- onLocalFast st $ ifM (Annex.Content.inAnnex key) ( return True @@ -705,7 +708,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate Just err -> giveup err Nothing -> return True res <- logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest -> - metered (Just (combineMeterUpdate meterupdate p)) key $ \_ p' -> + metered (Just (combineMeterUpdate meterupdate p)) key bwlimit $ \_ p' -> copier object (fromRawFilePath dest) key p' checksuccess verify Annex.Content.saveState True return res diff --git a/Remote/Helper/P2P.hs b/Remote/Helper/P2P.hs index 647bc6b016..0a9d41b9de 100644 --- a/Remote/Helper/P2P.hs +++ b/Remote/Helper/P2P.hs @@ -18,6 +18,7 @@ import Messages.Progress import Utility.Metered import Types.NumCopies import Annex.Verify +import Annex.Transfer import Control.Concurrent @@ -31,19 +32,21 @@ type ProtoConnRunner c = forall a. P2P.Proto a -> ClosableConnection c -> Annex -- the pool when done. type WithConn a c = (ClosableConnection c -> Annex (ClosableConnection c, a)) -> Annex a -store :: (MeterUpdate -> ProtoRunner Bool) -> Key -> AssociatedFile -> MeterUpdate -> Annex () -store runner k af p = do +store :: RemoteGitConfig -> (MeterUpdate -> ProtoRunner Bool) -> Key -> AssociatedFile -> MeterUpdate -> Annex () +store gc runner k af p = do let sizer = KeySizer k (fmap (toRawFilePath . fst) <$> prepSendAnnex k) - metered (Just p) sizer $ \_ p' -> + bwlimit <- bwLimit gc + metered (Just p) sizer bwlimit $ \_ p' -> runner p' (P2P.put k af p') >>= \case Just True -> return () Just False -> giveup "Transfer failed" Nothing -> remoteUnavail -retrieve :: (MeterUpdate -> ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification -retrieve runner k af dest p verifyconfig = do +retrieve :: RemoteGitConfig -> (MeterUpdate -> ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification +retrieve gc runner k af dest p verifyconfig = do iv <- startVerifyKeyContentIncrementally verifyconfig k - metered (Just p) k $ \m p' -> + bwlimit <- bwLimit gc + metered (Just p) k bwlimit $ \m p' -> runner p' (P2P.get dest k iv af m p') >>= \case Just (True, v) -> return v Just (False, _) -> giveup "Transfer failed" diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 86ba114d8c..d221be040e 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -42,6 +42,7 @@ import Types.StoreRetrieve import Types.Remote import Annex.Verify import Annex.UUID +import Annex.Transfer import Config import Config.Cost import Utility.Metered @@ -261,8 +262,9 @@ specialRemote' cfg c storer retriever remover checkpresent baser = encr chunkconfig = chunkConfig cfg displayprogress p k srcfile a - | displayProgress cfg = - metered (Just p) (KeySizer k (pure (fmap toRawFilePath srcfile))) (const a) + | displayProgress cfg = do + bwlimit <- bwLimit (gitconfig baser) + metered (Just p) (KeySizer k (pure (fmap toRawFilePath srcfile))) bwlimit (const a) | otherwise = a p withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a diff --git a/Remote/P2P.hs b/Remote/P2P.hs index 21cf5b42e1..7f3817c1bf 100644 --- a/Remote/P2P.hs +++ b/Remote/P2P.hs @@ -55,8 +55,8 @@ chainGen addr r u rc gc rs = do { uuid = u , cost = cst , name = Git.repoDescribe r - , storeKey = store (const protorunner) - , retrieveKeyFile = retrieve (const protorunner) + , storeKey = store gc (const protorunner) + , retrieveKeyFile = retrieve gc (const protorunner) , retrieveKeyFileCheap = Nothing , retrievalSecurityPolicy = RetrievalAllKeysSecure , removeKey = remove protorunner diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index ab1060e060..64585cdffc 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -124,6 +124,7 @@ data GitConfig = GitConfig , annexForwardRetry :: Maybe Integer , annexRetryDelay :: Maybe Seconds , annexStallDetection :: Maybe StallDetection + , annexBwLimit :: Maybe BwRate , annexAllowedUrlSchemes :: S.Set Scheme , annexAllowedIPAddresses :: String , annexAllowUnverifiedDownloads :: Bool @@ -218,8 +219,11 @@ extractGitConfig configsource r = GitConfig , annexRetryDelay = Seconds <$> getmayberead (annexConfig "retrydelay") , annexStallDetection = - either (const Nothing) id . parseStallDetection + either (const Nothing) Just . parseStallDetection =<< getmaybe (annexConfig "stalldetection") + , annexBwLimit = + either (const Nothing) Just . parseBwRate + =<< getmaybe (annexConfig "bwlimit") , annexAllowedUrlSchemes = S.fromList $ map mkScheme $ maybe ["http", "https", "ftp"] words $ getmaybe (annexConfig "security.allowed-url-schemes") @@ -343,6 +347,7 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexForwardRetry :: Maybe Integer , remoteAnnexRetryDelay :: Maybe Seconds , remoteAnnexStallDetection :: Maybe StallDetection + , remoteAnnexBwLimit :: Maybe BwRate , remoteAnnexAllowUnverifiedDownloads :: Bool , remoteAnnexConfigUUID :: Maybe UUID @@ -408,8 +413,11 @@ extractRemoteGitConfig r remotename = do , remoteAnnexRetryDelay = Seconds <$> getmayberead "retrydelay" , remoteAnnexStallDetection = - either (const Nothing) id . parseStallDetection + either (const Nothing) Just . parseStallDetection =<< getmaybe "stalldetection" + , remoteAnnexBwLimit = + either (const Nothing) Just . parseBwRate + =<< getmaybe "bwlimit" , remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ getmaybe ("security-allow-unverified-downloads") , remoteAnnexConfigUUID = toUUID <$> getmaybe "config-uuid" diff --git a/Types/StallDetection.hs b/Types/StallDetection.hs index 1cfc098a5d..13d88699f2 100644 --- a/Types/StallDetection.hs +++ b/Types/StallDetection.hs @@ -1,4 +1,4 @@ -{- types for stall detection +{- types for stall detection and banwdith rates - - Copyright 2020-2021 Joey Hess - @@ -13,7 +13,7 @@ import Utility.Misc import Git.Config data StallDetection - = StallDetection ByteSize Duration + = StallDetection BwRate -- ^ Unless the given number of bytes have been sent over the given -- amount of time, there's a stall. | ProbeStallDetection @@ -22,21 +22,29 @@ data StallDetection | StallDetectionDisabled deriving (Show) +data BwRate = BwRate ByteSize Duration + deriving (Show) + -- Parse eg, "0KiB/60s" -- -- Also, it can be set to "true" (or other git config equivilants) -- to enable ProbeStallDetection. -- And "false" (and other git config equivilants) explicitly -- disable stall detection. -parseStallDetection :: String -> Either String (Maybe StallDetection) +parseStallDetection :: String -> Either String StallDetection parseStallDetection s = case isTrueFalse s of Nothing -> do - let (bs, ds) = separate (== '/') s - b <- maybe - (Left $ "Unable to parse stall detection amount " ++ bs) - Right - (readSize dataUnits bs) - d <- parseDuration ds - return (Just (StallDetection b d)) - Just True -> Right (Just ProbeStallDetection) - Just False -> Right (Just StallDetectionDisabled) + v <- parseBwRate s + Right (StallDetection v) + Just True -> Right ProbeStallDetection + Just False -> Right StallDetectionDisabled + +parseBwRate :: String -> Either String BwRate +parseBwRate s = do + let (bs, ds) = separate (== '/') s + b <- maybe + (Left $ "Unable to parse bandwidth amount " ++ bs) + Right + (readSize dataUnits bs) + d <- parseDuration ds + Right (BwRate b d) diff --git a/Utility/Metered.hs b/Utility/Metered.hs index a7c9c37d7f..ea391edb5c 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -37,6 +37,7 @@ module Utility.Metered ( demeterCommandEnv, avoidProgress, rateLimitMeterUpdate, + bwLimitMeterUpdate, Meter, mkMeter, setMeterTotalSize, @@ -51,6 +52,7 @@ import Utility.Percentage import Utility.DataUnits import Utility.HumanTime import Utility.SimpleProtocol as Proto +import Utility.ThreadScheduler import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S @@ -380,6 +382,47 @@ rateLimitMeterUpdate delta (Meter totalsizev _ _ _) meterupdate = do meterupdate n else putMVar lastupdate prev +-- | Bandwidth limiting by inserting a delay at the point that a meter is +-- updated. +-- +-- This will only work when the actions that use bandwidth are run in the +-- same process and thread as the call to the MeterUpdate. +-- +-- For example, if the desired bandwidth is 100kb/s, and over the past +-- second, 200kb was sent, then pausing for half a second, and then +-- running for half a second should result in the desired bandwidth. +-- But, if after that pause, only 75kb is sent over the next half a +-- second, then the next pause should be 2/3rds of a second. +bwLimitMeterUpdate :: ByteSize -> Duration -> MeterUpdate -> IO MeterUpdate +bwLimitMeterUpdate sz duration meterupdate = do + nowtime <- getPOSIXTime + lastpause <- newMVar (nowtime, toEnum 0 :: POSIXTime, 0) + return $ mu lastpause + where + mu lastpause n@(BytesProcessed i) = do + nowtime <- getPOSIXTime + meterupdate n + lastv@(prevtime, prevpauselength, previ) <- takeMVar lastpause + let timedelta = nowtime - prevtime + if timedelta >= durationsecs + then do + let sz' = i - previ + let runtime = timedelta - prevpauselength + let pauselength = calcpauselength sz' runtime + if pauselength > 0 + then do + unboundDelay (floor (pauselength * fromIntegral oneSecond)) + putMVar lastpause (nowtime, pauselength, i) + else putMVar lastpause lastv + else putMVar lastpause lastv + + calcpauselength sz' runtime + | sz' > sz && sz' > 0 && runtime > 0 = + durationsecs - (fromIntegral sz / fromIntegral sz') * runtime + | otherwise = 0 + + durationsecs = fromIntegral (durationSeconds duration) + data Meter = Meter (MVar (Maybe TotalSize)) (MVar MeterState) (MVar String) DisplayMeter data MeterState = MeterState diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 148d321bfd..f781c09b09 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1384,6 +1384,27 @@ Remotes are configured using these settings in `.git/config`. When making multiple retries of the same transfer, the delay doubles after each retry. (default 1) +* `remote..annex-bwlimit`, `annex.bwlimit` + + This can be used to limit how much bandwidth is used for a transfer + from or to a remote. + + For example, to limit transfers to 1 gigabyte per second: + `git config annex.bwlimit "1GB/1s"` + + This will work with many remotes, including git remotes, but not + for remotes where the transfer is run by a separate program than + git-annex. + + The bandwidth limiting is implemented by pausing when + the transfer is running too fast, so it may use more bandwidth + than configured before being slowed down, either at the beginning + or if the available bandwidth changes while it is running. + + It is different to use "1GB/1s" than "10GB/10s". git-annex will + track how much data was transferred over the time period, and then + pausing. So usually 1s is the best time period to use. + * `remote..annex-stalldetecton`, `annex.stalldetection` Configuring this lets stalled or too-slow transfers be detected, and From 798b33ba3d357bd0f3b02398a92169de0197e6eb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 22 Sep 2021 10:51:10 -0400 Subject: [PATCH 2/4] simplify annex.bwlimit handling RemoteGitConfig parsing looks for annex.bwlimit when a remote does not have a per-remote config for it, so no need for a separate gobal config. Sponsored-by: Svenne Krap on Patreon --- Annex/Import.hs | 4 ++-- Annex/Transfer.hs | 7 ------- Remote/Git.hs | 4 ++-- Remote/Helper/P2P.hs | 5 ++--- Remote/Helper/Special.hs | 3 +-- Types/GitConfig.hs | 4 ---- 6 files changed, 7 insertions(+), 20 deletions(-) diff --git a/Annex/Import.hs b/Annex/Import.hs index 010808ef3f..2d15c11b99 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -461,7 +461,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec , providedMimeEncoding = Nothing , providedLinkType = Nothing } - bwlimit <- bwLimit (Remote.gitconfig remote) + let bwlimit = remoteAnnexBwLimit (Remote.gitconfig remote) islargefile <- checkMatcher' matcher mi mempty metered Nothing sz bwlimit $ const $ if islargefile then doimportlarge importkey cidmap db loc cid sz f @@ -558,7 +558,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec Left e -> do warning (show e) return Nothing - bwlimit <- bwLimit (Remote.gitconfig remote) + let bwlimit = remoteAnnexBwLimit (Remote.gitconfig remote) checkDiskSpaceToGet tmpkey Nothing $ notifyTransfer Download af $ download' (Remote.uuid remote) tmpkey af Nothing stdRetry $ \p -> diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index b0d53a66a3..c6597baec2 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -19,7 +19,6 @@ module Annex.Transfer ( noRetry, stdRetry, pickRemote, - bwLimit, ) where import Annex.Common @@ -402,9 +401,3 @@ lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering lessActiveFirst active a b | Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b | otherwise = comparing Remote.cost a b - -bwLimit :: RemoteGitConfig -> Annex (Maybe BwRate) -bwLimit gc = maybe globalcfg (pure . Just) remotecfg - where - globalcfg = annexBwLimit <$> Annex.getGitConfig - remotecfg = remoteAnnexBwLimit gc diff --git a/Remote/Git.hs b/Remote/Git.hs index 65d388bf92..7f615d114e 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -550,7 +550,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met | not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do u <- getUUID hardlink <- wantHardLink - bwlimit <- bwLimit (gitconfig r) + let bwlimit = remoteAnnexBwLimit (gitconfig r) -- run copy from perspective of remote onLocalFast st $ Annex.Content.prepSendAnnex' key >>= \case Just (object, check) -> do @@ -696,7 +696,7 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate checkio <- Annex.withCurrentState check u <- getUUID hardlink <- wantHardLink - bwlimit <- bwLimit (gitconfig r) + let bwlimit = remoteAnnexBwLimit (gitconfig r) -- run copy from perspective of remote res <- onLocalFast st $ ifM (Annex.Content.inAnnex key) ( return True diff --git a/Remote/Helper/P2P.hs b/Remote/Helper/P2P.hs index 0a9d41b9de..2dd641a67e 100644 --- a/Remote/Helper/P2P.hs +++ b/Remote/Helper/P2P.hs @@ -18,7 +18,6 @@ import Messages.Progress import Utility.Metered import Types.NumCopies import Annex.Verify -import Annex.Transfer import Control.Concurrent @@ -35,7 +34,7 @@ type WithConn a c = (ClosableConnection c -> Annex (ClosableConnection c, a)) -> store :: RemoteGitConfig -> (MeterUpdate -> ProtoRunner Bool) -> Key -> AssociatedFile -> MeterUpdate -> Annex () store gc runner k af p = do let sizer = KeySizer k (fmap (toRawFilePath . fst) <$> prepSendAnnex k) - bwlimit <- bwLimit gc + let bwlimit = remoteAnnexBwLimit gc metered (Just p) sizer bwlimit $ \_ p' -> runner p' (P2P.put k af p') >>= \case Just True -> return () @@ -45,7 +44,7 @@ store gc runner k af p = do retrieve :: RemoteGitConfig -> (MeterUpdate -> ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification retrieve gc runner k af dest p verifyconfig = do iv <- startVerifyKeyContentIncrementally verifyconfig k - bwlimit <- bwLimit gc + let bwlimit = remoteAnnexBwLimit gc metered (Just p) k bwlimit $ \m p' -> runner p' (P2P.get dest k iv af m p') >>= \case Just (True, v) -> return v diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index d221be040e..8f595865db 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -42,7 +42,6 @@ import Types.StoreRetrieve import Types.Remote import Annex.Verify import Annex.UUID -import Annex.Transfer import Config import Config.Cost import Utility.Metered @@ -263,7 +262,7 @@ specialRemote' cfg c storer retriever remover checkpresent baser = encr displayprogress p k srcfile a | displayProgress cfg = do - bwlimit <- bwLimit (gitconfig baser) + let bwlimit = remoteAnnexBwLimit (gitconfig baser) metered (Just p) (KeySizer k (pure (fmap toRawFilePath srcfile))) bwlimit (const a) | otherwise = a p diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index fca052da4e..affc7f0146 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -123,7 +123,6 @@ data GitConfig = GitConfig , annexRetry :: Maybe Integer , annexForwardRetry :: Maybe Integer , annexRetryDelay :: Maybe Seconds - , annexBwLimit :: Maybe BwRate , annexAllowedUrlSchemes :: S.Set Scheme , annexAllowedIPAddresses :: String , annexAllowUnverifiedDownloads :: Bool @@ -217,9 +216,6 @@ extractGitConfig configsource r = GitConfig , annexForwardRetry = getmayberead (annexConfig "forward-retry") , annexRetryDelay = Seconds <$> getmayberead (annexConfig "retrydelay") - , annexBwLimit = - either (const Nothing) Just . parseBwRate - =<< getmaybe (annexConfig "bwlimit") , annexAllowedUrlSchemes = S.fromList $ map mkScheme $ maybe ["http", "https", "ftp"] words $ getmaybe (annexConfig "security.allowed-url-schemes") From 44d3d50785a7ed004aa144236b7385f99e22ab0d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 22 Sep 2021 11:10:55 -0400 Subject: [PATCH 3/4] note --- doc/todo/bwlimit.mdwn | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/todo/bwlimit.mdwn b/doc/todo/bwlimit.mdwn index da1deffe69..337ec8616a 100644 --- a/doc/todo/bwlimit.mdwn +++ b/doc/todo/bwlimit.mdwn @@ -12,3 +12,20 @@ works, it will probably work to put the delay in there. --[[Joey]] > Implmentation in progress in the `bwlimit` branch. Seems to work, but see > commit message for what still needs to be done. --[[Joey]] + +> The directory special remote, when resuming an interrupted +> transfer, has to hash the file (with default annex.verify settings), +> and that hashing updates the progress bar, and so the bwlimit can kick +> in and slow down that initial hashing, before any data copying begins. +> This seems perhaps ok; if you've bwlimited a directory special +> remote you're wanting to limit disk IO. Only reason it might not be ok +> is if the intent is to limit IO to the disk containing the directory +> special remote, but not the one containing the annex repo. +> +> Other remotes, including git over ssh, when resuming don't have that +> problem. Looks like chunked special remotes narrowly avoid it, just +> because their implementation choose to not do incremental verification +> when resuming. It might be worthwhile to differentiate between progress +> updates for incremental verification setup and for actual transfers, and +> only rate limit the latter, just to avoid fragility in the code. +> --[[Joey]] From e8496d62e4555d913917a06cecbe34e3d98b4a27 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 22 Sep 2021 15:14:28 -0400 Subject: [PATCH 4/4] improved bwrate limiting implementation New method is much better. Avoids unrestrained transfer at the beginning (except for the first block. Keeps right at or a few kb/s below the configured limit, with very little varation in the actual reported bandwidth. Removed the /s part of the config as it's not needed. Ready to merge. Sponsored-by: Luke Shumaker on Patreon --- CHANGELOG | 5 ++-- Types/GitConfig.hs | 6 ++--- Utility/Metered.hs | 55 ++++++++++++++++++++----------------------- doc/git-annex.mdwn | 17 ++++--------- doc/todo/bwlimit.mdwn | 18 ++++++++------ 5 files changed, 46 insertions(+), 55 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 15e94fdc64..7a1d4066e5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,8 @@ git-annex (8.20210904) UNRELEASED; urgency=medium - * Added annex.bwlimit and remote.name.annex-bwlimit config that works - for git remotes and many but not all special remotes. + * Added annex.bwlimit and remote.name.annex-bwlimit config to limit + the bandwidth of transfers. It works for git remotes and many + but not all special remotes. * Bug fix: Git configs such as annex.verify were incorrectly overriding per-remote git configs such as remote.name.annex-verify. (Reversion in version 4.20130323) diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index affc7f0146..46362de897 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -407,9 +407,9 @@ extractRemoteGitConfig r remotename = do , remoteAnnexStallDetection = either (const Nothing) Just . parseStallDetection =<< getmaybe "stalldetection" - , remoteAnnexBwLimit = - either (const Nothing) Just . parseBwRate - =<< getmaybe "bwlimit" + , remoteAnnexBwLimit = do + sz <- readSize dataUnits =<< getmaybe "bwlimit" + return (BwRate sz (Duration 1)) , remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ getmaybe ("security-allow-unverified-downloads") , remoteAnnexConfigUUID = toUUID <$> getmaybe "config-uuid" diff --git a/Utility/Metered.hs b/Utility/Metered.hs index ea391edb5c..1e12444d60 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -389,39 +389,34 @@ rateLimitMeterUpdate delta (Meter totalsizev _ _ _) meterupdate = do -- same process and thread as the call to the MeterUpdate. -- -- For example, if the desired bandwidth is 100kb/s, and over the past --- second, 200kb was sent, then pausing for half a second, and then --- running for half a second should result in the desired bandwidth. --- But, if after that pause, only 75kb is sent over the next half a --- second, then the next pause should be 2/3rds of a second. +-- 1/10th of a second, 30kb was sent, then the current bandwidth is +-- 300kb/s, 3x as fast as desired. So, after getting the next chunk, +-- pause for twice as long as it took to get it. bwLimitMeterUpdate :: ByteSize -> Duration -> MeterUpdate -> IO MeterUpdate -bwLimitMeterUpdate sz duration meterupdate = do - nowtime <- getPOSIXTime - lastpause <- newMVar (nowtime, toEnum 0 :: POSIXTime, 0) - return $ mu lastpause - where - mu lastpause n@(BytesProcessed i) = do +bwLimitMeterUpdate bwlimit duration meterupdate + | bwlimit <= 0 = return meterupdate + | otherwise = do nowtime <- getPOSIXTime - meterupdate n - lastv@(prevtime, prevpauselength, previ) <- takeMVar lastpause - let timedelta = nowtime - prevtime - if timedelta >= durationsecs - then do - let sz' = i - previ - let runtime = timedelta - prevpauselength - let pauselength = calcpauselength sz' runtime - if pauselength > 0 - then do - unboundDelay (floor (pauselength * fromIntegral oneSecond)) - putMVar lastpause (nowtime, pauselength, i) - else putMVar lastpause lastv - else putMVar lastpause lastv + mv <- newMVar (nowtime, 0) + return (mu mv) + where + mu mv n@(BytesProcessed i) = do + endtime <- getPOSIXTime + (starttime, previ) <- takeMVar mv - calcpauselength sz' runtime - | sz' > sz && sz' > 0 && runtime > 0 = - durationsecs - (fromIntegral sz / fromIntegral sz') * runtime - | otherwise = 0 - - durationsecs = fromIntegral (durationSeconds duration) + let runtime = endtime - starttime + let currbw = fromIntegral (i - previ) / runtime + let pausescale = if currbw > bwlimit' + then (currbw / bwlimit') - 1 + else 0 + unboundDelay (floor (runtime * pausescale * msecs)) + meterupdate n + + nowtime <- getPOSIXTime + putMVar mv (nowtime, i) + + bwlimit' = fromIntegral (bwlimit * durationSeconds duration) + msecs = fromIntegral oneSecond data Meter = Meter (MVar (Maybe TotalSize)) (MVar MeterState) (MVar String) DisplayMeter diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index f781c09b09..def6ec1dc4 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1389,21 +1389,12 @@ Remotes are configured using these settings in `.git/config`. This can be used to limit how much bandwidth is used for a transfer from or to a remote. - For example, to limit transfers to 1 gigabyte per second: - `git config annex.bwlimit "1GB/1s"` - + For example, to limit transfers to 1 mebibyte per second: + `git config annex.bwlimit "1MiB"` + This will work with many remotes, including git remotes, but not for remotes where the transfer is run by a separate program than - git-annex. - - The bandwidth limiting is implemented by pausing when - the transfer is running too fast, so it may use more bandwidth - than configured before being slowed down, either at the beginning - or if the available bandwidth changes while it is running. - - It is different to use "1GB/1s" than "10GB/10s". git-annex will - track how much data was transferred over the time period, and then - pausing. So usually 1s is the best time period to use. + git-annex. * `remote..annex-stalldetecton`, `annex.stalldetection` diff --git a/doc/todo/bwlimit.mdwn b/doc/todo/bwlimit.mdwn index 337ec8616a..675896cfe0 100644 --- a/doc/todo/bwlimit.mdwn +++ b/doc/todo/bwlimit.mdwn @@ -10,17 +10,17 @@ works, it will probably work to put the delay in there. --[[Joey]] [[confirmed]] -> Implmentation in progress in the `bwlimit` branch. Seems to work, but see -> commit message for what still needs to be done. --[[Joey]] - -> The directory special remote, when resuming an interrupted +> Implemented and works well. +> +> A local git remote, when resuming an interrupted > transfer, has to hash the file (with default annex.verify settings), > and that hashing updates the progress bar, and so the bwlimit can kick > in and slow down that initial hashing, before any data copying begins. -> This seems perhaps ok; if you've bwlimited a directory special +> This seems perhaps ok; if you've bwlimited a local git remote, > remote you're wanting to limit disk IO. Only reason it might not be ok -> is if the intent is to limit IO to the disk containing the directory -> special remote, but not the one containing the annex repo. +> is if the intent is to limit IO to the disk containing the remote +> but not the one containing the annex repo. (This also probably +> holds for the directory special remote.) > > Other remotes, including git over ssh, when resuming don't have that > problem. Looks like chunked special remotes narrowly avoid it, just @@ -28,4 +28,8 @@ works, it will probably work to put the delay in there. --[[Joey]] > when resuming. It might be worthwhile to differentiate between progress > updates for incremental verification setup and for actual transfers, and > only rate limit the latter, just to avoid fragility in the code. +> I have not done so yet though, and am closing this.. > --[[Joey]] + +[[done]] +