Parse youtube-dl progress output

Which lets progress be displayed when doing concurrent downloads.
Amoung other things, like --json-progress etc.

The youtube-dl output is no longer displayed, except for any errors.

This commit was sponsored by Denis Dzyubenko on Patreon.
This commit is contained in:
Joey Hess 2020-09-29 17:53:48 -04:00
parent 4c7335caf3
commit 4c32499e82
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 117 additions and 49 deletions

View file

@ -74,9 +74,7 @@ runTransfer = runTransfer' False
{- Like runTransfer, but ignores any existing transfer lock file for the {- Like runTransfer, but ignores any existing transfer lock file for the
- transfer, allowing re-running a transfer that is already in progress. - transfer, allowing re-running a transfer that is already in progress.
- -}
- Note that this may result in confusing progress meter display in the
- webapp, if multiple processes are writing to the transfer info file. -}
alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
alwaysRunTransfer = runTransfer' True alwaysRunTransfer = runTransfer' True

View file

@ -1,6 +1,6 @@
{- youtube-dl integration for git-annex {- youtube-dl integration for git-annex
- -
- Copyright 2017-2018 Joey Hess <id@joeyh.name> - Copyright 2017-2020 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -21,10 +21,15 @@ import Annex.Url
import Utility.DiskFree import Utility.DiskFree
import Utility.HtmlDetect import Utility.HtmlDetect
import Utility.Process.Transcript import Utility.Process.Transcript
import Utility.Metered
import Utility.DataUnits
import Messages.Progress
import Logs.Transfer import Logs.Transfer
import Network.URI import Network.URI
import Control.Concurrent.Async import Control.Concurrent.Async
import Data.Char
import Text.Read
-- youtube-dl can follow redirects to anywhere, including potentially -- youtube-dl can follow redirects to anywhere, including potentially
-- localhost or a private address. So, it's only allowed to download -- localhost or a private address. So, it's only allowed to download
@ -43,6 +48,8 @@ youtubeDlNotAllowedMessage = unwords
-- Runs youtube-dl in a work directory, to download a single media file -- Runs youtube-dl in a work directory, to download a single media file
-- from the url. Reutrns the path to the media file in the work directory. -- from the url. Reutrns the path to the media file in the work directory.
-- --
-- Displays a progress meter as youtube-dl downloads.
--
-- If youtube-dl fails without writing any files to the work directory, -- If youtube-dl fails without writing any files to the work directory,
-- or is not installed, returns Right Nothing. -- or is not installed, returns Right Nothing.
-- --
@ -53,14 +60,14 @@ youtubeDlNotAllowedMessage = unwords
-- --
-- (Note that we can't use --output to specifiy the file to download to, -- (Note that we can't use --output to specifiy the file to download to,
-- due to <https://github.com/rg3/youtube-dl/issues/14864>) -- due to <https://github.com/rg3/youtube-dl/issues/14864>)
youtubeDl :: URLString -> FilePath -> Annex (Either String (Maybe FilePath)) youtubeDl :: URLString -> FilePath -> MeterUpdate -> Annex (Either String (Maybe FilePath))
youtubeDl url workdir = ifM ipAddressesUnlimited youtubeDl url workdir p = ifM ipAddressesUnlimited
( withUrlOptions $ youtubeDl' url workdir ( withUrlOptions $ youtubeDl' url workdir p
, return $ Left youtubeDlNotAllowedMessage , return $ Left youtubeDlNotAllowedMessage
) )
youtubeDl' :: URLString -> FilePath -> UrlOptions -> Annex (Either String (Maybe FilePath)) youtubeDl' :: URLString -> FilePath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe FilePath))
youtubeDl' url workdir uo youtubeDl' url workdir p uo
| supportedScheme uo url = ifM (liftIO $ inPath "youtube-dl") | supportedScheme uo url = ifM (liftIO $ inPath "youtube-dl")
( runcmd >>= \case ( runcmd >>= \case
Right True -> workdirfiles >>= \case Right True -> workdirfiles >>= \case
@ -81,11 +88,17 @@ youtubeDl' url workdir uo
runcmd = youtubeDlMaxSize workdir >>= \case runcmd = youtubeDlMaxSize workdir >>= \case
Left msg -> return (Left msg) Left msg -> return (Left msg)
Right maxsize -> do Right maxsize -> do
quiet <- commandProgressDisabled opts <- youtubeDlOpts (dlopts ++ maxsize)
opts <- youtubeDlOpts $ dlopts ++ maxsize ++ oh <- mkOutputHandlerQuiet
if quiet then [ Param "--quiet" ] else [] -- The size is unknown to start. Once youtube-dl
ok <- liftIO $ boolSystem' "youtube-dl" opts $ -- outputs some progress, the meter will be updated
\p -> p { cwd = Just workdir } -- with the size, which is why it's important the
-- meter is passed into commandMeter'
let unknownsize = Nothing :: Maybe FileSize
ok <- metered (Just p) unknownsize $ \meter meterupdate ->
liftIO $ commandMeter'
parseYoutubeDlProgress oh (Just meter) meterupdate "youtube-dl" opts
(\pr -> pr { cwd = Just workdir })
return (Right ok) return (Right ok)
dlopts = dlopts =
[ Param url [ Param url
@ -125,10 +138,10 @@ youtubeDlMaxSize workdir = ifM (Annex.getState Annex.force)
) )
-- Download a media file to a destination, -- Download a media file to a destination,
youtubeDlTo :: Key -> URLString -> FilePath -> Annex Bool youtubeDlTo :: Key -> URLString -> FilePath -> MeterUpdate -> Annex Bool
youtubeDlTo key url dest = do youtubeDlTo key url dest p = do
res <- withTmpWorkDir key $ \workdir -> res <- withTmpWorkDir key $ \workdir ->
youtubeDl url workdir >>= \case youtubeDl url workdir p >>= \case
Right (Just mediafile) -> do Right (Just mediafile) -> do
liftIO $ renameFile mediafile dest liftIO $ renameFile mediafile dest
return (Just True) return (Just True)
@ -239,3 +252,37 @@ supportedScheme uo url = case parseURIRelaxed url of
-- involving youtube-dl in a ftp download -- involving youtube-dl in a ftp download
"ftp:" -> False "ftp:" -> False
_ -> allowedScheme uo u _ -> allowedScheme uo u
{- Strategy: Look for chunks prefixed with \r, which look approximately
- like this:
- "ESC[K[download] 26.6% of 60.22MiB at 254.69MiB/s ETA 00:00"
- Look at the number before "% of " and the number and unit after,
- to determine the number of bytes.
-}
parseYoutubeDlProgress :: ProgressParser
parseYoutubeDlProgress = go [] . reverse . progresschunks
where
delim = '\r'
progresschunks = drop 1 . splitc delim
go remainder [] = (Nothing, Nothing, remainder)
go remainder (x:xs) = case split "% of " x of
(p:r:[]) -> case (parsepercent p, parsebytes r) of
(Just percent, Just total) ->
( Just (toBytesProcessed (calc percent total))
, Just (TotalSize total)
, remainder
)
_ -> go (delim:x++remainder) xs
_ -> go (delim:x++remainder) xs
calc :: Double -> Integer -> Integer
calc percent total = round (percent * fromIntegral total / 100)
parsepercent :: String -> Maybe Double
parsepercent = readMaybe . reverse . takeWhile (not . isSpace) . reverse
parsebytes = readSize units . takeWhile (not . isSpace)
units = memoryUnits ++ storageUnits

View file

@ -34,6 +34,8 @@ git-annex (8.20200909) UNRELEASED; urgency=medium
* upgrade: Avoid an upgrade failure of a bare repo in unusual circumstances. * upgrade: Avoid an upgrade failure of a bare repo in unusual circumstances.
* httpalso: Support being used with special remotes that do not have * httpalso: Support being used with special remotes that do not have
encryption= in their config. encryption= in their config.
* Parse youtube-dl progress output, which lets progress be displayed
when doing concurrent downloads.
-- Joey Hess <id@joeyh.name> Mon, 14 Sep 2020 18:34:37 -0400 -- Joey Hess <id@joeyh.name> Mon, 14 Sep 2020 18:34:37 -0400

View file

@ -327,9 +327,10 @@ downloadWeb addunlockedmatcher o url urlinfo file =
where where
dl dest = withTmpWorkDir mediakey $ \workdir -> do dl dest = withTmpWorkDir mediakey $ \workdir -> do
let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . nukeFile) let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
showNote "using youtube-dl"
Transfer.notifyTransfer Transfer.Download url $ Transfer.notifyTransfer Transfer.Download url $
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \p ->
youtubeDl url workdir >>= \case youtubeDl url workdir p >>= \case
Right (Just mediafile) -> do Right (Just mediafile) -> do
cleanuptmp cleanuptmp
checkCanAdd o dest $ \canadd -> do checkCanAdd o dest $ \canadd -> do

View file

@ -307,7 +307,7 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl
| rawOption (downloadOptions opts) = downloadlink | rawOption (downloadOptions opts) = downloadlink
| otherwise = do | otherwise = do
r <- withTmpWorkDir mediakey $ \workdir -> do r <- withTmpWorkDir mediakey $ \workdir -> do
dl <- youtubeDl linkurl workdir dl <- youtubeDl linkurl workdir nullMeterUpdate
case dl of case dl of
Right (Just mediafile) -> do Right (Just mediafile) -> do
let ext = case takeExtension mediafile of let ext = case takeExtension mediafile of

View file

@ -307,16 +307,16 @@ ariaProgress :: Maybe Integer -> MeterUpdate -> [CommandParam] -> Annex Bool
ariaProgress Nothing _ ps = runAria ps ariaProgress Nothing _ ps = runAria ps
ariaProgress (Just sz) meter ps = do ariaProgress (Just sz) meter ps = do
oh <- mkOutputHandler oh <- mkOutputHandler
liftIO . commandMeter (parseAriaProgress sz) oh meter "aria2c" liftIO . commandMeter (parseAriaProgress sz) oh Nothing meter "aria2c"
=<< ariaParams ps =<< ariaParams ps
parseAriaProgress :: Integer -> ProgressParser parseAriaProgress :: Integer -> ProgressParser
parseAriaProgress totalsize = go [] . reverse . splitc '\r' parseAriaProgress totalsize = go [] . reverse . splitc '\r'
where where
go remainder [] = (Nothing, remainder) go remainder [] = (Nothing, Nothing, remainder)
go remainder (x:xs) = case readish (findpercent x) of go remainder (x:xs) = case readish (findpercent x) of
Nothing -> go (x++remainder) xs Nothing -> go (x++remainder) xs
Just p -> (Just (frompercent p), remainder) Just p -> (Just (frompercent p), Nothing, remainder)
-- "(N%)" -- "(N%)"
findpercent = takeWhile (/= '%') . drop 1 . dropWhile (/= '(') findpercent = takeWhile (/= '%') . drop 1 . dropWhile (/= '(')

View file

@ -90,9 +90,7 @@ downloadKey key _af dest p = do
r <- untilTrue urls $ \u -> do r <- untilTrue urls $ \u -> do
let (u', downloader) = getDownloader u let (u', downloader) = getDownloader u
case downloader of case downloader of
YoutubeDownloader -> do YoutubeDownloader -> youtubeDlTo key u' dest p
showOutput
youtubeDlTo key u' dest
_ -> Url.withUrlOptions $ downloadUrl key p [u'] dest _ -> Url.withUrlOptions $ downloadUrl key p [u'] dest
unless r $ unless r $
giveup "download failed" giveup "download failed"

View file

@ -11,6 +11,7 @@ module Utility.Metered (
MeterUpdate, MeterUpdate,
nullMeterUpdate, nullMeterUpdate,
combineMeterUpdate, combineMeterUpdate,
TotalSize(..),
BytesProcessed(..), BytesProcessed(..),
toBytesProcessed, toBytesProcessed,
fromBytesProcessed, fromBytesProcessed,
@ -29,6 +30,8 @@ module Utility.Metered (
ProgressParser, ProgressParser,
commandMeter, commandMeter,
commandMeter', commandMeter',
commandMeterExitCode,
commandMeterExitCode',
demeterCommand, demeterCommand,
demeterCommandEnv, demeterCommandEnv,
avoidProgress, avoidProgress,
@ -228,31 +231,44 @@ data OutputHandler = OutputHandler
} }
{- Parses the String looking for a command's progress output, and returns {- Parses the String looking for a command's progress output, and returns
- Maybe the number of bytes done so far, and any any remainder of the - Maybe the number of bytes done so far, optionally a total size,
- string that could be an incomplete progress output. That remainder - and any any remainder of the string that could be an incomplete
- should be prepended to future output, and fed back in. This interface - progress output. That remainder should be prepended to future output,
- allows the command's output to be read in any desired size chunk, or - and fed back in. This interface allows the command's output to be read
- even one character at a time. - in any desired size chunk, or even one character at a time.
-} -}
type ProgressParser = String -> (Maybe BytesProcessed, String) type ProgressParser = String -> (Maybe BytesProcessed, Maybe TotalSize, String)
newtype TotalSize = TotalSize Integer
{- Runs a command and runs a ProgressParser on its output, in order {- Runs a command and runs a ProgressParser on its output, in order
- to update a meter. - to update a meter.
-
- If the Meter is provided, the ProgressParser can report the total size,
- which allows creating a Meter before the size is known.
-} -}
commandMeter :: ProgressParser -> OutputHandler -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool commandMeter :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool
commandMeter progressparser oh meterupdate cmd params = do commandMeter progressparser oh meter meterupdate cmd params =
ret <- commandMeter' progressparser oh meterupdate cmd params commandMeter' progressparser oh meter meterupdate cmd params id
commandMeter' :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> (CreateProcess -> CreateProcess) -> IO Bool
commandMeter' progressparser oh meter meterupdate cmd params mkprocess = do
ret <- commandMeterExitCode' progressparser oh meter meterupdate cmd params mkprocess
return $ case ret of return $ case ret of
Just ExitSuccess -> True Just ExitSuccess -> True
_ -> False _ -> False
commandMeter' :: ProgressParser -> OutputHandler -> MeterUpdate -> FilePath -> [CommandParam] -> IO (Maybe ExitCode) commandMeterExitCode :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> IO (Maybe ExitCode)
commandMeter' progressparser oh meterupdate cmd params = commandMeterExitCode progressparser oh meter meterupdate cmd params =
outputFilter cmd params Nothing commandMeterExitCode' progressparser oh meter meterupdate cmd params id
(feedprogress zeroBytesProcessed [])
commandMeterExitCode' :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> (CreateProcess -> CreateProcess) -> IO (Maybe ExitCode)
commandMeterExitCode' progressparser oh mmeter meterupdate cmd params mkprocess =
outputFilter cmd params mkprocess Nothing
(feedprogress mmeter zeroBytesProcessed [])
handlestderr handlestderr
where where
feedprogress prev buf h = do feedprogress sendtotalsize prev buf h = do
b <- S.hGetSome h 80 b <- S.hGetSome h 80
if S.null b if S.null b
then return () then return ()
@ -261,13 +277,18 @@ commandMeter' progressparser oh meterupdate cmd params =
S.hPut stdout b S.hPut stdout b
hFlush stdout hFlush stdout
let s = decodeBS b let s = decodeBS b
let (mbytes, buf') = progressparser (buf++s) let (mbytes, mtotalsize, buf') = progressparser (buf++s)
sendtotalsize' <- case (sendtotalsize, mtotalsize) of
(Just meter, Just (TotalSize n)) -> do
setMeterTotalSize meter n
return Nothing
_ -> return sendtotalsize
case mbytes of case mbytes of
Nothing -> feedprogress prev buf' h Nothing -> feedprogress sendtotalsize' prev buf' h
(Just bytes) -> do (Just bytes) -> do
when (bytes /= prev) $ when (bytes /= prev) $
meterupdate bytes meterupdate bytes
feedprogress bytes buf' h feedprogress sendtotalsize' bytes buf' h
handlestderr h = unlessM (hIsEOF h) $ do handlestderr h = unlessM (hIsEOF h) $ do
stderrHandler oh =<< hGetLine h stderrHandler oh =<< hGetLine h
@ -283,7 +304,7 @@ demeterCommand oh cmd params = demeterCommandEnv oh cmd params Nothing
demeterCommandEnv :: OutputHandler -> FilePath -> [CommandParam] -> Maybe [(String, String)] -> IO Bool demeterCommandEnv :: OutputHandler -> FilePath -> [CommandParam] -> Maybe [(String, String)] -> IO Bool
demeterCommandEnv oh cmd params environ = do demeterCommandEnv oh cmd params environ = do
ret <- outputFilter cmd params environ ret <- outputFilter cmd params id environ
(\outh -> avoidProgress True outh stdouthandler) (\outh -> avoidProgress True outh stdouthandler)
(\errh -> avoidProgress True errh $ stderrHandler oh) (\errh -> avoidProgress True errh $ stderrHandler oh)
return $ case ret of return $ case ret of
@ -308,11 +329,12 @@ avoidProgress doavoid h emitter = unlessM (hIsEOF h) $ do
outputFilter outputFilter
:: FilePath :: FilePath
-> [CommandParam] -> [CommandParam]
-> (CreateProcess -> CreateProcess)
-> Maybe [(String, String)] -> Maybe [(String, String)]
-> (Handle -> IO ()) -> (Handle -> IO ())
-> (Handle -> IO ()) -> (Handle -> IO ())
-> IO (Maybe ExitCode) -> IO (Maybe ExitCode)
outputFilter cmd params environ outfilter errfilter = outputFilter cmd params mkprocess environ outfilter errfilter =
catchMaybeIO $ withCreateProcess p go catchMaybeIO $ withCreateProcess p go
where where
go _ (Just outh) (Just errh) pid = do go _ (Just outh) (Just errh) pid = do
@ -338,7 +360,7 @@ outputFilter cmd params environ outfilter errfilter =
return ret return ret
go _ _ _ _ = error "internal" go _ _ _ _ = error "internal"
p = (proc cmd (toCommand params)) p = mkprocess (proc cmd (toCommand params))
{ env = environ { env = environ
, std_out = CreatePipe , std_out = CreatePipe
, std_err = CreatePipe , std_err = CreatePipe

View file

@ -114,7 +114,7 @@ rsyncUrlIsPath s
-} -}
rsyncProgress :: OutputHandler -> MeterUpdate -> [CommandParam] -> IO Bool rsyncProgress :: OutputHandler -> MeterUpdate -> [CommandParam] -> IO Bool
rsyncProgress oh meter ps = rsyncProgress oh meter ps =
commandMeter' parseRsyncProgress oh meter "rsync" (rsyncParamsFixup ps) >>= \case commandMeterExitCode parseRsyncProgress oh Nothing meter "rsync" (rsyncParamsFixup ps) >>= \case
Just ExitSuccess -> return True Just ExitSuccess -> return True
Just (ExitFailure exitcode) -> do Just (ExitFailure exitcode) -> do
when (exitcode /= 1) $ when (exitcode /= 1) $
@ -136,10 +136,10 @@ rsyncProgress oh meter ps =
parseRsyncProgress :: ProgressParser parseRsyncProgress :: ProgressParser
parseRsyncProgress = go [] . reverse . progresschunks parseRsyncProgress = go [] . reverse . progresschunks
where where
go remainder [] = (Nothing, remainder) go remainder [] = (Nothing, Nothing, remainder)
go remainder (x:xs) = case parsebytes (findbytesstart x) of go remainder (x:xs) = case parsebytes (findbytesstart x) of
Nothing -> go (delim:x++remainder) xs Nothing -> go (delim:x++remainder) xs
Just b -> (Just (toBytesProcessed b), remainder) Just b -> (Just (toBytesProcessed b), Nothing, remainder)
delim = '\r' delim = '\r'