fix regression in addurl --fast caused by youtube-dl support
Similar to c6e4bc0a22
but another code
path. As well as using youtube-dl unecessarily, it used the filename it
comes up with, which while nice for youtube videos, is not right for
other files.
This means more work is done for urls that youtube-dl does support,
but is probably more efficient for other urls, since it only downloads
the first chunk of content, while youtube-dl probably downloads more.
This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
5b98da60b1
commit
8990afaef0
3 changed files with 46 additions and 25 deletions
|
@ -106,42 +106,49 @@ youtubeDlTo key url dest = do
|
||||||
return Nothing
|
return Nothing
|
||||||
return (fromMaybe False res)
|
return (fromMaybe False res)
|
||||||
|
|
||||||
youtubeDlSupported :: URLString -> Annex Bool
|
|
||||||
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
|
|
||||||
|
|
||||||
-- Check if youtube-dl can find media in an url.
|
|
||||||
--
|
|
||||||
-- youtube-dl supports downloading urls that are not html pages,
|
-- youtube-dl supports downloading urls that are not html pages,
|
||||||
-- but we don't want to use it for such urls, since they can be downloaded
|
-- but we don't want to use it for such urls, since they can be downloaded
|
||||||
-- without it. So, this first downloads part of the content and checks
|
-- without it. So, this first downloads part of the content and checks
|
||||||
-- if it's a html page; only then is youtube-dl used.
|
-- if it's a html page; only then is youtube-dl used.
|
||||||
youtubeDlCheck :: URLString -> Annex (Either String Bool)
|
htmlOnly :: URLString -> a -> Annex a -> Annex a
|
||||||
youtubeDlCheck url = catchMsgIO $ do
|
htmlOnly url fallback a = do
|
||||||
uo <- getUrlOptions
|
uo <- getUrlOptions
|
||||||
liftIO (downloadPartial url uo htmlPrefixLength) >>= \case
|
liftIO (downloadPartial url uo htmlPrefixLength) >>= \case
|
||||||
Just bs | isHtmlBs bs -> do
|
Just bs | isHtmlBs bs -> a
|
||||||
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
|
_ -> return fallback
|
||||||
liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
|
|
||||||
_ -> return False
|
youtubeDlSupported :: URLString -> Annex Bool
|
||||||
|
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
|
||||||
|
|
||||||
|
-- Check if youtube-dl can find media in an url.
|
||||||
|
youtubeDlCheck :: URLString -> Annex (Either String Bool)
|
||||||
|
youtubeDlCheck url = catchMsgIO $ htmlOnly url False $ do
|
||||||
|
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
|
||||||
|
liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
|
||||||
|
|
||||||
-- Ask youtube-dl for the filename of media in an url.
|
-- Ask youtube-dl for the filename of media in an url.
|
||||||
--
|
--
|
||||||
-- (This is not always identical to the filename it uses when downloading.)
|
-- (This is not always identical to the filename it uses when downloading.)
|
||||||
youtubeDlFileName :: URLString -> Annex (Either String FilePath)
|
youtubeDlFileName :: URLString -> Annex (Either String FilePath)
|
||||||
youtubeDlFileName url = flip catchIO (pure . Left . show) $ do
|
youtubeDlFileName url = flip catchIO (pure . Left . show) $
|
||||||
-- Sometimes youtube-dl will fail with an ugly backtrace
|
htmlOnly url nomedia $ do
|
||||||
-- (eg, http://bugs.debian.org/874321)
|
-- Sometimes youtube-dl will fail with an ugly backtrace
|
||||||
-- so catch stderr as well as stdout to avoid the user seeing it.
|
-- (eg, http://bugs.debian.org/874321)
|
||||||
-- --no-warnings avoids warning messages that are output to stdout.
|
-- so catch stderr as well as stdout to avoid the user
|
||||||
opts <- youtubeDlOpts
|
-- seeing it. --no-warnings avoids warning messages that
|
||||||
[ Param url
|
-- are output to stdout.
|
||||||
, Param "--get-filename"
|
opts <- youtubeDlOpts
|
||||||
, Param "--no-warnings"
|
[ Param url
|
||||||
]
|
, Param "--get-filename"
|
||||||
(output, ok) <- liftIO $ processTranscript "youtube-dl" (toCommand opts) Nothing
|
, Param "--no-warnings"
|
||||||
return $ case (ok, lines output) of
|
]
|
||||||
(True, (f:_)) | not (null f) -> Right f
|
(output, ok) <- liftIO $ processTranscript "youtube-dl"
|
||||||
_ -> Left "no media in url"
|
(toCommand opts) Nothing
|
||||||
|
return $ case (ok, lines output) of
|
||||||
|
(True, (f:_)) | not (null f) -> Right f
|
||||||
|
_ -> nomedia
|
||||||
|
where
|
||||||
|
nomedia = Left "no media in url"
|
||||||
|
|
||||||
youtubeDlOpts :: [CommandParam] -> Annex [CommandParam]
|
youtubeDlOpts :: [CommandParam] -> Annex [CommandParam]
|
||||||
youtubeDlOpts addopts = do
|
youtubeDlOpts addopts = do
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 2"""
|
||||||
|
date="2017-12-06T16:02:55Z"
|
||||||
|
content="""
|
||||||
|
"""]]
|
|
@ -0,0 +1,8 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 4"""
|
||||||
|
date="2017-12-08T18:48:46Z"
|
||||||
|
content="""
|
||||||
|
That one happened with `git annex addurl --fast $url` so a different code
|
||||||
|
path. Had to add a html page check to youtubeDlFileName to fix it.
|
||||||
|
"""]]
|
Loading…
Add table
Add a link
Reference in a new issue