fix regression in addurl --fast caused by youtube-dl support
Similar to c6e4bc0a22
but another code
path. As well as using youtube-dl unecessarily, it used the filename it
comes up with, which while nice for youtube videos, is not right for
other files.
This means more work is done for urls that youtube-dl does support,
but is probably more efficient for other urls, since it only downloads
the first chunk of content, while youtube-dl probably downloads more.
This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
5b98da60b1
commit
8990afaef0
3 changed files with 46 additions and 25 deletions
|
@ -106,42 +106,49 @@ youtubeDlTo key url dest = do
|
|||
return Nothing
|
||||
return (fromMaybe False res)
|
||||
|
||||
youtubeDlSupported :: URLString -> Annex Bool
|
||||
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
|
||||
|
||||
-- Check if youtube-dl can find media in an url.
|
||||
--
|
||||
-- youtube-dl supports downloading urls that are not html pages,
|
||||
-- but we don't want to use it for such urls, since they can be downloaded
|
||||
-- without it. So, this first downloads part of the content and checks
|
||||
-- if it's a html page; only then is youtube-dl used.
|
||||
youtubeDlCheck :: URLString -> Annex (Either String Bool)
|
||||
youtubeDlCheck url = catchMsgIO $ do
|
||||
htmlOnly :: URLString -> a -> Annex a -> Annex a
|
||||
htmlOnly url fallback a = do
|
||||
uo <- getUrlOptions
|
||||
liftIO (downloadPartial url uo htmlPrefixLength) >>= \case
|
||||
Just bs | isHtmlBs bs -> do
|
||||
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
|
||||
liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
|
||||
_ -> return False
|
||||
Just bs | isHtmlBs bs -> a
|
||||
_ -> return fallback
|
||||
|
||||
youtubeDlSupported :: URLString -> Annex Bool
|
||||
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
|
||||
|
||||
-- Check if youtube-dl can find media in an url.
|
||||
youtubeDlCheck :: URLString -> Annex (Either String Bool)
|
||||
youtubeDlCheck url = catchMsgIO $ htmlOnly url False $ do
|
||||
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
|
||||
liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
|
||||
|
||||
-- Ask youtube-dl for the filename of media in an url.
|
||||
--
|
||||
-- (This is not always identical to the filename it uses when downloading.)
|
||||
youtubeDlFileName :: URLString -> Annex (Either String FilePath)
|
||||
youtubeDlFileName url = flip catchIO (pure . Left . show) $ do
|
||||
-- Sometimes youtube-dl will fail with an ugly backtrace
|
||||
-- (eg, http://bugs.debian.org/874321)
|
||||
-- so catch stderr as well as stdout to avoid the user seeing it.
|
||||
-- --no-warnings avoids warning messages that are output to stdout.
|
||||
opts <- youtubeDlOpts
|
||||
[ Param url
|
||||
, Param "--get-filename"
|
||||
, Param "--no-warnings"
|
||||
]
|
||||
(output, ok) <- liftIO $ processTranscript "youtube-dl" (toCommand opts) Nothing
|
||||
return $ case (ok, lines output) of
|
||||
(True, (f:_)) | not (null f) -> Right f
|
||||
_ -> Left "no media in url"
|
||||
youtubeDlFileName url = flip catchIO (pure . Left . show) $
|
||||
htmlOnly url nomedia $ do
|
||||
-- Sometimes youtube-dl will fail with an ugly backtrace
|
||||
-- (eg, http://bugs.debian.org/874321)
|
||||
-- so catch stderr as well as stdout to avoid the user
|
||||
-- seeing it. --no-warnings avoids warning messages that
|
||||
-- are output to stdout.
|
||||
opts <- youtubeDlOpts
|
||||
[ Param url
|
||||
, Param "--get-filename"
|
||||
, Param "--no-warnings"
|
||||
]
|
||||
(output, ok) <- liftIO $ processTranscript "youtube-dl"
|
||||
(toCommand opts) Nothing
|
||||
return $ case (ok, lines output) of
|
||||
(True, (f:_)) | not (null f) -> Right f
|
||||
_ -> nomedia
|
||||
where
|
||||
nomedia = Left "no media in url"
|
||||
|
||||
youtubeDlOpts :: [CommandParam] -> Annex [CommandParam]
|
||||
youtubeDlOpts addopts = do
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 2"""
|
||||
date="2017-12-06T16:02:55Z"
|
||||
content="""
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 4"""
|
||||
date="2017-12-08T18:48:46Z"
|
||||
content="""
|
||||
That one happened with `git annex addurl --fast $url` so a different code
|
||||
path. Had to add a html page check to youtubeDlFileName to fix it.
|
||||
"""]]
|
Loading…
Add table
Reference in a new issue