From 4c32499e824c42a86b450854b6d130a1585365a7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Sep 2020 17:53:48 -0400 Subject: [PATCH] Parse youtube-dl progress output Which lets progress be displayed when doing concurrent downloads. Amoung other things, like --json-progress etc. The youtube-dl output is no longer displayed, except for any errors. This commit was sponsored by Denis Dzyubenko on Patreon. --- Annex/Transfer.hs | 4 +-- Annex/YoutubeDl.hs | 75 +++++++++++++++++++++++++++++++++++-------- CHANGELOG | 2 ++ Command/AddUrl.hs | 5 +-- Command/ImportFeed.hs | 2 +- Remote/BitTorrent.hs | 6 ++-- Remote/Web.hs | 4 +-- Utility/Metered.hs | 62 +++++++++++++++++++++++------------ Utility/Rsync.hs | 6 ++-- 9 files changed, 117 insertions(+), 49 deletions(-) diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index f2321a7f36..fccc11a65d 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -74,9 +74,7 @@ runTransfer = runTransfer' False {- Like runTransfer, but ignores any existing transfer lock file for the - transfer, allowing re-running a transfer that is already in progress. - - - - Note that this may result in confusing progress meter display in the - - webapp, if multiple processes are writing to the transfer info file. -} + -} alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v alwaysRunTransfer = runTransfer' True diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs index 6868f53174..f0190ba702 100644 --- a/Annex/YoutubeDl.hs +++ b/Annex/YoutubeDl.hs @@ -1,6 +1,6 @@ {- youtube-dl integration for git-annex - - - Copyright 2017-2018 Joey Hess + - Copyright 2017-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -21,10 +21,15 @@ import Annex.Url import Utility.DiskFree import Utility.HtmlDetect import Utility.Process.Transcript +import Utility.Metered +import Utility.DataUnits +import Messages.Progress import Logs.Transfer import Network.URI import Control.Concurrent.Async +import Data.Char +import Text.Read -- youtube-dl can follow redirects to anywhere, including potentially -- localhost or a private address. So, it's only allowed to download @@ -43,6 +48,8 @@ youtubeDlNotAllowedMessage = unwords -- Runs youtube-dl in a work directory, to download a single media file -- from the url. Reutrns the path to the media file in the work directory. -- +-- Displays a progress meter as youtube-dl downloads. +-- -- If youtube-dl fails without writing any files to the work directory, -- or is not installed, returns Right Nothing. -- @@ -53,14 +60,14 @@ youtubeDlNotAllowedMessage = unwords -- -- (Note that we can't use --output to specifiy the file to download to, -- due to ) -youtubeDl :: URLString -> FilePath -> Annex (Either String (Maybe FilePath)) -youtubeDl url workdir = ifM ipAddressesUnlimited - ( withUrlOptions $ youtubeDl' url workdir +youtubeDl :: URLString -> FilePath -> MeterUpdate -> Annex (Either String (Maybe FilePath)) +youtubeDl url workdir p = ifM ipAddressesUnlimited + ( withUrlOptions $ youtubeDl' url workdir p , return $ Left youtubeDlNotAllowedMessage ) -youtubeDl' :: URLString -> FilePath -> UrlOptions -> Annex (Either String (Maybe FilePath)) -youtubeDl' url workdir uo +youtubeDl' :: URLString -> FilePath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe FilePath)) +youtubeDl' url workdir p uo | supportedScheme uo url = ifM (liftIO $ inPath "youtube-dl") ( runcmd >>= \case Right True -> workdirfiles >>= \case @@ -81,11 +88,17 @@ youtubeDl' url workdir uo runcmd = youtubeDlMaxSize workdir >>= \case Left msg -> return (Left msg) Right maxsize -> do - quiet <- commandProgressDisabled - opts <- youtubeDlOpts $ dlopts ++ maxsize ++ - if quiet then [ Param "--quiet" ] else [] - ok <- liftIO $ boolSystem' "youtube-dl" opts $ - \p -> p { cwd = Just workdir } + opts <- youtubeDlOpts (dlopts ++ maxsize) + oh <- mkOutputHandlerQuiet + -- The size is unknown to start. Once youtube-dl + -- outputs some progress, the meter will be updated + -- with the size, which is why it's important the + -- meter is passed into commandMeter' + let unknownsize = Nothing :: Maybe FileSize + ok <- metered (Just p) unknownsize $ \meter meterupdate -> + liftIO $ commandMeter' + parseYoutubeDlProgress oh (Just meter) meterupdate "youtube-dl" opts + (\pr -> pr { cwd = Just workdir }) return (Right ok) dlopts = [ Param url @@ -125,10 +138,10 @@ youtubeDlMaxSize workdir = ifM (Annex.getState Annex.force) ) -- Download a media file to a destination, -youtubeDlTo :: Key -> URLString -> FilePath -> Annex Bool -youtubeDlTo key url dest = do +youtubeDlTo :: Key -> URLString -> FilePath -> MeterUpdate -> Annex Bool +youtubeDlTo key url dest p = do res <- withTmpWorkDir key $ \workdir -> - youtubeDl url workdir >>= \case + youtubeDl url workdir p >>= \case Right (Just mediafile) -> do liftIO $ renameFile mediafile dest return (Just True) @@ -239,3 +252,37 @@ supportedScheme uo url = case parseURIRelaxed url of -- involving youtube-dl in a ftp download "ftp:" -> False _ -> allowedScheme uo u + +{- Strategy: Look for chunks prefixed with \r, which look approximately + - like this: + - "ESC[K[download] 26.6% of 60.22MiB at 254.69MiB/s ETA 00:00" + - Look at the number before "% of " and the number and unit after, + - to determine the number of bytes. + -} +parseYoutubeDlProgress :: ProgressParser +parseYoutubeDlProgress = go [] . reverse . progresschunks + where + delim = '\r' + + progresschunks = drop 1 . splitc delim + + go remainder [] = (Nothing, Nothing, remainder) + go remainder (x:xs) = case split "% of " x of + (p:r:[]) -> case (parsepercent p, parsebytes r) of + (Just percent, Just total) -> + ( Just (toBytesProcessed (calc percent total)) + , Just (TotalSize total) + , remainder + ) + _ -> go (delim:x++remainder) xs + _ -> go (delim:x++remainder) xs + + calc :: Double -> Integer -> Integer + calc percent total = round (percent * fromIntegral total / 100) + + parsepercent :: String -> Maybe Double + parsepercent = readMaybe . reverse . takeWhile (not . isSpace) . reverse + + parsebytes = readSize units . takeWhile (not . isSpace) + + units = memoryUnits ++ storageUnits diff --git a/CHANGELOG b/CHANGELOG index 6dbe6c6d5a..c6c2405bcd 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -34,6 +34,8 @@ git-annex (8.20200909) UNRELEASED; urgency=medium * upgrade: Avoid an upgrade failure of a bare repo in unusual circumstances. * httpalso: Support being used with special remotes that do not have encryption= in their config. + * Parse youtube-dl progress output, which lets progress be displayed + when doing concurrent downloads. -- Joey Hess Mon, 14 Sep 2020 18:34:37 -0400 diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 3267369965..1c775a8cce 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -327,9 +327,10 @@ downloadWeb addunlockedmatcher o url urlinfo file = where dl dest = withTmpWorkDir mediakey $ \workdir -> do let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . nukeFile) + showNote "using youtube-dl" Transfer.notifyTransfer Transfer.Download url $ - Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> - youtubeDl url workdir >>= \case + Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \p -> + youtubeDl url workdir p >>= \case Right (Just mediafile) -> do cleanuptmp checkCanAdd o dest $ \canadd -> do diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 9dd379019c..2387a07d5f 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -307,7 +307,7 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl | rawOption (downloadOptions opts) = downloadlink | otherwise = do r <- withTmpWorkDir mediakey $ \workdir -> do - dl <- youtubeDl linkurl workdir + dl <- youtubeDl linkurl workdir nullMeterUpdate case dl of Right (Just mediafile) -> do let ext = case takeExtension mediafile of diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 4adde73dda..7d787c13d0 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -307,16 +307,16 @@ ariaProgress :: Maybe Integer -> MeterUpdate -> [CommandParam] -> Annex Bool ariaProgress Nothing _ ps = runAria ps ariaProgress (Just sz) meter ps = do oh <- mkOutputHandler - liftIO . commandMeter (parseAriaProgress sz) oh meter "aria2c" + liftIO . commandMeter (parseAriaProgress sz) oh Nothing meter "aria2c" =<< ariaParams ps parseAriaProgress :: Integer -> ProgressParser parseAriaProgress totalsize = go [] . reverse . splitc '\r' where - go remainder [] = (Nothing, remainder) + go remainder [] = (Nothing, Nothing, remainder) go remainder (x:xs) = case readish (findpercent x) of Nothing -> go (x++remainder) xs - Just p -> (Just (frompercent p), remainder) + Just p -> (Just (frompercent p), Nothing, remainder) -- "(N%)" findpercent = takeWhile (/= '%') . drop 1 . dropWhile (/= '(') diff --git a/Remote/Web.hs b/Remote/Web.hs index 984ce58d19..bbe24b38fc 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -90,9 +90,7 @@ downloadKey key _af dest p = do r <- untilTrue urls $ \u -> do let (u', downloader) = getDownloader u case downloader of - YoutubeDownloader -> do - showOutput - youtubeDlTo key u' dest + YoutubeDownloader -> youtubeDlTo key u' dest p _ -> Url.withUrlOptions $ downloadUrl key p [u'] dest unless r $ giveup "download failed" diff --git a/Utility/Metered.hs b/Utility/Metered.hs index e62a5bba4a..4aef2efcc2 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -11,6 +11,7 @@ module Utility.Metered ( MeterUpdate, nullMeterUpdate, combineMeterUpdate, + TotalSize(..), BytesProcessed(..), toBytesProcessed, fromBytesProcessed, @@ -29,6 +30,8 @@ module Utility.Metered ( ProgressParser, commandMeter, commandMeter', + commandMeterExitCode, + commandMeterExitCode', demeterCommand, demeterCommandEnv, avoidProgress, @@ -228,31 +231,44 @@ data OutputHandler = OutputHandler } {- Parses the String looking for a command's progress output, and returns - - Maybe the number of bytes done so far, and any any remainder of the - - string that could be an incomplete progress output. That remainder - - should be prepended to future output, and fed back in. This interface - - allows the command's output to be read in any desired size chunk, or - - even one character at a time. + - Maybe the number of bytes done so far, optionally a total size, + - and any any remainder of the string that could be an incomplete + - progress output. That remainder should be prepended to future output, + - and fed back in. This interface allows the command's output to be read + - in any desired size chunk, or even one character at a time. -} -type ProgressParser = String -> (Maybe BytesProcessed, String) +type ProgressParser = String -> (Maybe BytesProcessed, Maybe TotalSize, String) + +newtype TotalSize = TotalSize Integer {- Runs a command and runs a ProgressParser on its output, in order - to update a meter. + - + - If the Meter is provided, the ProgressParser can report the total size, + - which allows creating a Meter before the size is known. -} -commandMeter :: ProgressParser -> OutputHandler -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool -commandMeter progressparser oh meterupdate cmd params = do - ret <- commandMeter' progressparser oh meterupdate cmd params +commandMeter :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool +commandMeter progressparser oh meter meterupdate cmd params = + commandMeter' progressparser oh meter meterupdate cmd params id + +commandMeter' :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> (CreateProcess -> CreateProcess) -> IO Bool +commandMeter' progressparser oh meter meterupdate cmd params mkprocess = do + ret <- commandMeterExitCode' progressparser oh meter meterupdate cmd params mkprocess return $ case ret of Just ExitSuccess -> True _ -> False -commandMeter' :: ProgressParser -> OutputHandler -> MeterUpdate -> FilePath -> [CommandParam] -> IO (Maybe ExitCode) -commandMeter' progressparser oh meterupdate cmd params = - outputFilter cmd params Nothing - (feedprogress zeroBytesProcessed []) +commandMeterExitCode :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> IO (Maybe ExitCode) +commandMeterExitCode progressparser oh meter meterupdate cmd params = + commandMeterExitCode' progressparser oh meter meterupdate cmd params id + +commandMeterExitCode' :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> (CreateProcess -> CreateProcess) -> IO (Maybe ExitCode) +commandMeterExitCode' progressparser oh mmeter meterupdate cmd params mkprocess = + outputFilter cmd params mkprocess Nothing + (feedprogress mmeter zeroBytesProcessed []) handlestderr where - feedprogress prev buf h = do + feedprogress sendtotalsize prev buf h = do b <- S.hGetSome h 80 if S.null b then return () @@ -261,13 +277,18 @@ commandMeter' progressparser oh meterupdate cmd params = S.hPut stdout b hFlush stdout let s = decodeBS b - let (mbytes, buf') = progressparser (buf++s) + let (mbytes, mtotalsize, buf') = progressparser (buf++s) + sendtotalsize' <- case (sendtotalsize, mtotalsize) of + (Just meter, Just (TotalSize n)) -> do + setMeterTotalSize meter n + return Nothing + _ -> return sendtotalsize case mbytes of - Nothing -> feedprogress prev buf' h + Nothing -> feedprogress sendtotalsize' prev buf' h (Just bytes) -> do when (bytes /= prev) $ meterupdate bytes - feedprogress bytes buf' h + feedprogress sendtotalsize' bytes buf' h handlestderr h = unlessM (hIsEOF h) $ do stderrHandler oh =<< hGetLine h @@ -283,7 +304,7 @@ demeterCommand oh cmd params = demeterCommandEnv oh cmd params Nothing demeterCommandEnv :: OutputHandler -> FilePath -> [CommandParam] -> Maybe [(String, String)] -> IO Bool demeterCommandEnv oh cmd params environ = do - ret <- outputFilter cmd params environ + ret <- outputFilter cmd params id environ (\outh -> avoidProgress True outh stdouthandler) (\errh -> avoidProgress True errh $ stderrHandler oh) return $ case ret of @@ -308,11 +329,12 @@ avoidProgress doavoid h emitter = unlessM (hIsEOF h) $ do outputFilter :: FilePath -> [CommandParam] + -> (CreateProcess -> CreateProcess) -> Maybe [(String, String)] -> (Handle -> IO ()) -> (Handle -> IO ()) -> IO (Maybe ExitCode) -outputFilter cmd params environ outfilter errfilter = +outputFilter cmd params mkprocess environ outfilter errfilter = catchMaybeIO $ withCreateProcess p go where go _ (Just outh) (Just errh) pid = do @@ -338,7 +360,7 @@ outputFilter cmd params environ outfilter errfilter = return ret go _ _ _ _ = error "internal" - p = (proc cmd (toCommand params)) + p = mkprocess (proc cmd (toCommand params)) { env = environ , std_out = CreatePipe , std_err = CreatePipe diff --git a/Utility/Rsync.hs b/Utility/Rsync.hs index c6881b7ab9..e377eb965d 100644 --- a/Utility/Rsync.hs +++ b/Utility/Rsync.hs @@ -114,7 +114,7 @@ rsyncUrlIsPath s -} rsyncProgress :: OutputHandler -> MeterUpdate -> [CommandParam] -> IO Bool rsyncProgress oh meter ps = - commandMeter' parseRsyncProgress oh meter "rsync" (rsyncParamsFixup ps) >>= \case + commandMeterExitCode parseRsyncProgress oh Nothing meter "rsync" (rsyncParamsFixup ps) >>= \case Just ExitSuccess -> return True Just (ExitFailure exitcode) -> do when (exitcode /= 1) $ @@ -136,10 +136,10 @@ rsyncProgress oh meter ps = parseRsyncProgress :: ProgressParser parseRsyncProgress = go [] . reverse . progresschunks where - go remainder [] = (Nothing, remainder) + go remainder [] = (Nothing, Nothing, remainder) go remainder (x:xs) = case parsebytes (findbytesstart x) of Nothing -> go (delim:x++remainder) xs - Just b -> (Just (toBytesProcessed b), remainder) + Just b -> (Just (toBytesProcessed b), Nothing, remainder) delim = '\r'