Parse youtube-dl progress output

Which lets progress be displayed when doing concurrent downloads.
Amoung other things, like --json-progress etc.

The youtube-dl output is no longer displayed, except for any errors.

This commit was sponsored by Denis Dzyubenko on Patreon.
This commit is contained in:
Joey Hess 2020-09-29 17:53:48 -04:00
parent 4c7335caf3
commit 4c32499e82
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 117 additions and 49 deletions

View file

@ -74,9 +74,7 @@ runTransfer = runTransfer' False
{- Like runTransfer, but ignores any existing transfer lock file for the
- transfer, allowing re-running a transfer that is already in progress.
-
- Note that this may result in confusing progress meter display in the
- webapp, if multiple processes are writing to the transfer info file. -}
-}
alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
alwaysRunTransfer = runTransfer' True

View file

@ -1,6 +1,6 @@
{- youtube-dl integration for git-annex
-
- Copyright 2017-2018 Joey Hess <id@joeyh.name>
- Copyright 2017-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -21,10 +21,15 @@ import Annex.Url
import Utility.DiskFree
import Utility.HtmlDetect
import Utility.Process.Transcript
import Utility.Metered
import Utility.DataUnits
import Messages.Progress
import Logs.Transfer
import Network.URI
import Control.Concurrent.Async
import Data.Char
import Text.Read
-- youtube-dl can follow redirects to anywhere, including potentially
-- localhost or a private address. So, it's only allowed to download
@ -43,6 +48,8 @@ youtubeDlNotAllowedMessage = unwords
-- Runs youtube-dl in a work directory, to download a single media file
-- from the url. Reutrns the path to the media file in the work directory.
--
-- Displays a progress meter as youtube-dl downloads.
--
-- If youtube-dl fails without writing any files to the work directory,
-- or is not installed, returns Right Nothing.
--
@ -53,14 +60,14 @@ youtubeDlNotAllowedMessage = unwords
--
-- (Note that we can't use --output to specifiy the file to download to,
-- due to <https://github.com/rg3/youtube-dl/issues/14864>)
youtubeDl :: URLString -> FilePath -> Annex (Either String (Maybe FilePath))
youtubeDl url workdir = ifM ipAddressesUnlimited
( withUrlOptions $ youtubeDl' url workdir
youtubeDl :: URLString -> FilePath -> MeterUpdate -> Annex (Either String (Maybe FilePath))
youtubeDl url workdir p = ifM ipAddressesUnlimited
( withUrlOptions $ youtubeDl' url workdir p
, return $ Left youtubeDlNotAllowedMessage
)
youtubeDl' :: URLString -> FilePath -> UrlOptions -> Annex (Either String (Maybe FilePath))
youtubeDl' url workdir uo
youtubeDl' :: URLString -> FilePath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe FilePath))
youtubeDl' url workdir p uo
| supportedScheme uo url = ifM (liftIO $ inPath "youtube-dl")
( runcmd >>= \case
Right True -> workdirfiles >>= \case
@ -81,11 +88,17 @@ youtubeDl' url workdir uo
runcmd = youtubeDlMaxSize workdir >>= \case
Left msg -> return (Left msg)
Right maxsize -> do
quiet <- commandProgressDisabled
opts <- youtubeDlOpts $ dlopts ++ maxsize ++
if quiet then [ Param "--quiet" ] else []
ok <- liftIO $ boolSystem' "youtube-dl" opts $
\p -> p { cwd = Just workdir }
opts <- youtubeDlOpts (dlopts ++ maxsize)
oh <- mkOutputHandlerQuiet
-- The size is unknown to start. Once youtube-dl
-- outputs some progress, the meter will be updated
-- with the size, which is why it's important the
-- meter is passed into commandMeter'
let unknownsize = Nothing :: Maybe FileSize
ok <- metered (Just p) unknownsize $ \meter meterupdate ->
liftIO $ commandMeter'
parseYoutubeDlProgress oh (Just meter) meterupdate "youtube-dl" opts
(\pr -> pr { cwd = Just workdir })
return (Right ok)
dlopts =
[ Param url
@ -125,10 +138,10 @@ youtubeDlMaxSize workdir = ifM (Annex.getState Annex.force)
)
-- Download a media file to a destination,
youtubeDlTo :: Key -> URLString -> FilePath -> Annex Bool
youtubeDlTo key url dest = do
youtubeDlTo :: Key -> URLString -> FilePath -> MeterUpdate -> Annex Bool
youtubeDlTo key url dest p = do
res <- withTmpWorkDir key $ \workdir ->
youtubeDl url workdir >>= \case
youtubeDl url workdir p >>= \case
Right (Just mediafile) -> do
liftIO $ renameFile mediafile dest
return (Just True)
@ -239,3 +252,37 @@ supportedScheme uo url = case parseURIRelaxed url of
-- involving youtube-dl in a ftp download
"ftp:" -> False
_ -> allowedScheme uo u
{- Strategy: Look for chunks prefixed with \r, which look approximately
- like this:
- "ESC[K[download] 26.6% of 60.22MiB at 254.69MiB/s ETA 00:00"
- Look at the number before "% of " and the number and unit after,
- to determine the number of bytes.
-}
parseYoutubeDlProgress :: ProgressParser
parseYoutubeDlProgress = go [] . reverse . progresschunks
where
delim = '\r'
progresschunks = drop 1 . splitc delim
go remainder [] = (Nothing, Nothing, remainder)
go remainder (x:xs) = case split "% of " x of
(p:r:[]) -> case (parsepercent p, parsebytes r) of
(Just percent, Just total) ->
( Just (toBytesProcessed (calc percent total))
, Just (TotalSize total)
, remainder
)
_ -> go (delim:x++remainder) xs
_ -> go (delim:x++remainder) xs
calc :: Double -> Integer -> Integer
calc percent total = round (percent * fromIntegral total / 100)
parsepercent :: String -> Maybe Double
parsepercent = readMaybe . reverse . takeWhile (not . isSpace) . reverse
parsebytes = readSize units . takeWhile (not . isSpace)
units = memoryUnits ++ storageUnits

View file

@ -34,6 +34,8 @@ git-annex (8.20200909) UNRELEASED; urgency=medium
* upgrade: Avoid an upgrade failure of a bare repo in unusual circumstances.
* httpalso: Support being used with special remotes that do not have
encryption= in their config.
* Parse youtube-dl progress output, which lets progress be displayed
when doing concurrent downloads.
-- Joey Hess <id@joeyh.name> Mon, 14 Sep 2020 18:34:37 -0400

View file

@ -327,9 +327,10 @@ downloadWeb addunlockedmatcher o url urlinfo file =
where
dl dest = withTmpWorkDir mediakey $ \workdir -> do
let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
showNote "using youtube-dl"
Transfer.notifyTransfer Transfer.Download url $
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p ->
youtubeDl url workdir >>= \case
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \p ->
youtubeDl url workdir p >>= \case
Right (Just mediafile) -> do
cleanuptmp
checkCanAdd o dest $ \canadd -> do

View file

@ -307,7 +307,7 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl
| rawOption (downloadOptions opts) = downloadlink
| otherwise = do
r <- withTmpWorkDir mediakey $ \workdir -> do
dl <- youtubeDl linkurl workdir
dl <- youtubeDl linkurl workdir nullMeterUpdate
case dl of
Right (Just mediafile) -> do
let ext = case takeExtension mediafile of

View file

@ -307,16 +307,16 @@ ariaProgress :: Maybe Integer -> MeterUpdate -> [CommandParam] -> Annex Bool
ariaProgress Nothing _ ps = runAria ps
ariaProgress (Just sz) meter ps = do
oh <- mkOutputHandler
liftIO . commandMeter (parseAriaProgress sz) oh meter "aria2c"
liftIO . commandMeter (parseAriaProgress sz) oh Nothing meter "aria2c"
=<< ariaParams ps
parseAriaProgress :: Integer -> ProgressParser
parseAriaProgress totalsize = go [] . reverse . splitc '\r'
where
go remainder [] = (Nothing, remainder)
go remainder [] = (Nothing, Nothing, remainder)
go remainder (x:xs) = case readish (findpercent x) of
Nothing -> go (x++remainder) xs
Just p -> (Just (frompercent p), remainder)
Just p -> (Just (frompercent p), Nothing, remainder)
-- "(N%)"
findpercent = takeWhile (/= '%') . drop 1 . dropWhile (/= '(')

View file

@ -90,9 +90,7 @@ downloadKey key _af dest p = do
r <- untilTrue urls $ \u -> do
let (u', downloader) = getDownloader u
case downloader of
YoutubeDownloader -> do
showOutput
youtubeDlTo key u' dest
YoutubeDownloader -> youtubeDlTo key u' dest p
_ -> Url.withUrlOptions $ downloadUrl key p [u'] dest
unless r $
giveup "download failed"

View file

@ -11,6 +11,7 @@ module Utility.Metered (
MeterUpdate,
nullMeterUpdate,
combineMeterUpdate,
TotalSize(..),
BytesProcessed(..),
toBytesProcessed,
fromBytesProcessed,
@ -29,6 +30,8 @@ module Utility.Metered (
ProgressParser,
commandMeter,
commandMeter',
commandMeterExitCode,
commandMeterExitCode',
demeterCommand,
demeterCommandEnv,
avoidProgress,
@ -228,31 +231,44 @@ data OutputHandler = OutputHandler
}
{- Parses the String looking for a command's progress output, and returns
- Maybe the number of bytes done so far, and any any remainder of the
- string that could be an incomplete progress output. That remainder
- should be prepended to future output, and fed back in. This interface
- allows the command's output to be read in any desired size chunk, or
- even one character at a time.
- Maybe the number of bytes done so far, optionally a total size,
- and any any remainder of the string that could be an incomplete
- progress output. That remainder should be prepended to future output,
- and fed back in. This interface allows the command's output to be read
- in any desired size chunk, or even one character at a time.
-}
type ProgressParser = String -> (Maybe BytesProcessed, String)
type ProgressParser = String -> (Maybe BytesProcessed, Maybe TotalSize, String)
newtype TotalSize = TotalSize Integer
{- Runs a command and runs a ProgressParser on its output, in order
- to update a meter.
-
- If the Meter is provided, the ProgressParser can report the total size,
- which allows creating a Meter before the size is known.
-}
commandMeter :: ProgressParser -> OutputHandler -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool
commandMeter progressparser oh meterupdate cmd params = do
ret <- commandMeter' progressparser oh meterupdate cmd params
commandMeter :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool
commandMeter progressparser oh meter meterupdate cmd params =
commandMeter' progressparser oh meter meterupdate cmd params id
commandMeter' :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> (CreateProcess -> CreateProcess) -> IO Bool
commandMeter' progressparser oh meter meterupdate cmd params mkprocess = do
ret <- commandMeterExitCode' progressparser oh meter meterupdate cmd params mkprocess
return $ case ret of
Just ExitSuccess -> True
_ -> False
commandMeter' :: ProgressParser -> OutputHandler -> MeterUpdate -> FilePath -> [CommandParam] -> IO (Maybe ExitCode)
commandMeter' progressparser oh meterupdate cmd params =
outputFilter cmd params Nothing
(feedprogress zeroBytesProcessed [])
commandMeterExitCode :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> IO (Maybe ExitCode)
commandMeterExitCode progressparser oh meter meterupdate cmd params =
commandMeterExitCode' progressparser oh meter meterupdate cmd params id
commandMeterExitCode' :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> (CreateProcess -> CreateProcess) -> IO (Maybe ExitCode)
commandMeterExitCode' progressparser oh mmeter meterupdate cmd params mkprocess =
outputFilter cmd params mkprocess Nothing
(feedprogress mmeter zeroBytesProcessed [])
handlestderr
where
feedprogress prev buf h = do
feedprogress sendtotalsize prev buf h = do
b <- S.hGetSome h 80
if S.null b
then return ()
@ -261,13 +277,18 @@ commandMeter' progressparser oh meterupdate cmd params =
S.hPut stdout b
hFlush stdout
let s = decodeBS b
let (mbytes, buf') = progressparser (buf++s)
let (mbytes, mtotalsize, buf') = progressparser (buf++s)
sendtotalsize' <- case (sendtotalsize, mtotalsize) of
(Just meter, Just (TotalSize n)) -> do
setMeterTotalSize meter n
return Nothing
_ -> return sendtotalsize
case mbytes of
Nothing -> feedprogress prev buf' h
Nothing -> feedprogress sendtotalsize' prev buf' h
(Just bytes) -> do
when (bytes /= prev) $
meterupdate bytes
feedprogress bytes buf' h
feedprogress sendtotalsize' bytes buf' h
handlestderr h = unlessM (hIsEOF h) $ do
stderrHandler oh =<< hGetLine h
@ -283,7 +304,7 @@ demeterCommand oh cmd params = demeterCommandEnv oh cmd params Nothing
demeterCommandEnv :: OutputHandler -> FilePath -> [CommandParam] -> Maybe [(String, String)] -> IO Bool
demeterCommandEnv oh cmd params environ = do
ret <- outputFilter cmd params environ
ret <- outputFilter cmd params id environ
(\outh -> avoidProgress True outh stdouthandler)
(\errh -> avoidProgress True errh $ stderrHandler oh)
return $ case ret of
@ -308,11 +329,12 @@ avoidProgress doavoid h emitter = unlessM (hIsEOF h) $ do
outputFilter
:: FilePath
-> [CommandParam]
-> (CreateProcess -> CreateProcess)
-> Maybe [(String, String)]
-> (Handle -> IO ())
-> (Handle -> IO ())
-> IO (Maybe ExitCode)
outputFilter cmd params environ outfilter errfilter =
outputFilter cmd params mkprocess environ outfilter errfilter =
catchMaybeIO $ withCreateProcess p go
where
go _ (Just outh) (Just errh) pid = do
@ -338,7 +360,7 @@ outputFilter cmd params environ outfilter errfilter =
return ret
go _ _ _ _ = error "internal"
p = (proc cmd (toCommand params))
p = mkprocess (proc cmd (toCommand params))
{ env = environ
, std_out = CreatePipe
, std_err = CreatePipe

View file

@ -114,7 +114,7 @@ rsyncUrlIsPath s
-}
rsyncProgress :: OutputHandler -> MeterUpdate -> [CommandParam] -> IO Bool
rsyncProgress oh meter ps =
commandMeter' parseRsyncProgress oh meter "rsync" (rsyncParamsFixup ps) >>= \case
commandMeterExitCode parseRsyncProgress oh Nothing meter "rsync" (rsyncParamsFixup ps) >>= \case
Just ExitSuccess -> return True
Just (ExitFailure exitcode) -> do
when (exitcode /= 1) $
@ -136,10 +136,10 @@ rsyncProgress oh meter ps =
parseRsyncProgress :: ProgressParser
parseRsyncProgress = go [] . reverse . progresschunks
where
go remainder [] = (Nothing, remainder)
go remainder [] = (Nothing, Nothing, remainder)
go remainder (x:xs) = case parsebytes (findbytesstart x) of
Nothing -> go (delim:x++remainder) xs
Just b -> (Just (toBytesProcessed b), remainder)
Just b -> (Just (toBytesProcessed b), Nothing, remainder)
delim = '\r'