fix regression in addurl --fast caused by youtube-dl support

Similar to c6e4bc0a22 but another code
path. As well as using youtube-dl unecessarily, it used the filename it
comes up with, which while nice for youtube videos, is not right for
other files.

This means more work is done for urls that youtube-dl does support,
but is probably more efficient for other urls, since it only downloads
the first chunk of content, while youtube-dl probably downloads more.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-12-08 14:49:55 -04:00
parent 5b98da60b1
commit 8990afaef0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 46 additions and 25 deletions

View file

@ -106,42 +106,49 @@ youtubeDlTo key url dest = do
return Nothing
return (fromMaybe False res)
youtubeDlSupported :: URLString -> Annex Bool
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
-- Check if youtube-dl can find media in an url.
--
-- youtube-dl supports downloading urls that are not html pages,
-- but we don't want to use it for such urls, since they can be downloaded
-- without it. So, this first downloads part of the content and checks
-- if it's a html page; only then is youtube-dl used.
youtubeDlCheck :: URLString -> Annex (Either String Bool)
youtubeDlCheck url = catchMsgIO $ do
htmlOnly :: URLString -> a -> Annex a -> Annex a
htmlOnly url fallback a = do
uo <- getUrlOptions
liftIO (downloadPartial url uo htmlPrefixLength) >>= \case
Just bs | isHtmlBs bs -> do
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
_ -> return False
Just bs | isHtmlBs bs -> a
_ -> return fallback
youtubeDlSupported :: URLString -> Annex Bool
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
-- Check if youtube-dl can find media in an url.
youtubeDlCheck :: URLString -> Annex (Either String Bool)
youtubeDlCheck url = catchMsgIO $ htmlOnly url False $ do
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
-- Ask youtube-dl for the filename of media in an url.
--
-- (This is not always identical to the filename it uses when downloading.)
youtubeDlFileName :: URLString -> Annex (Either String FilePath)
youtubeDlFileName url = flip catchIO (pure . Left . show) $ do
-- Sometimes youtube-dl will fail with an ugly backtrace
-- (eg, http://bugs.debian.org/874321)
-- so catch stderr as well as stdout to avoid the user seeing it.
-- --no-warnings avoids warning messages that are output to stdout.
opts <- youtubeDlOpts
[ Param url
, Param "--get-filename"
, Param "--no-warnings"
]
(output, ok) <- liftIO $ processTranscript "youtube-dl" (toCommand opts) Nothing
return $ case (ok, lines output) of
(True, (f:_)) | not (null f) -> Right f
_ -> Left "no media in url"
youtubeDlFileName url = flip catchIO (pure . Left . show) $
htmlOnly url nomedia $ do
-- Sometimes youtube-dl will fail with an ugly backtrace
-- (eg, http://bugs.debian.org/874321)
-- so catch stderr as well as stdout to avoid the user
-- seeing it. --no-warnings avoids warning messages that
-- are output to stdout.
opts <- youtubeDlOpts
[ Param url
, Param "--get-filename"
, Param "--no-warnings"
]
(output, ok) <- liftIO $ processTranscript "youtube-dl"
(toCommand opts) Nothing
return $ case (ok, lines output) of
(True, (f:_)) | not (null f) -> Right f
_ -> nomedia
where
nomedia = Left "no media in url"
youtubeDlOpts :: [CommandParam] -> Annex [CommandParam]
youtubeDlOpts addopts = do

View file

@ -0,0 +1,6 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2017-12-06T16:02:55Z"
content="""
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2017-12-08T18:48:46Z"
content="""
That one happened with `git annex addurl --fast $url` so a different code
path. Had to add a html page check to youtubeDlFileName to fix it.
"""]]