fix regression in addurl --file caused by youtube-dl support
Now youtubeDlCheck downloads the beginning of the url's content and checks if it's html, only when it is does it pass it off the youtube-dl to check if it supports it. This means more work is done for urls that youtube-dl does support, but is probably more efficient for other urls, since it only downloads the first chunk of content, while youtube-dl probably downloads more. As well as the reported bug, this also fixes behavior when an url was added with youtube-dl, but the url content has now changed from a html page to something else. Remote.Web.checkKey used to wrongly succeed in that situation, since youtube-dl said sure it can download that something else. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
6b5e55a154
commit
c6e4bc0a22
4 changed files with 71 additions and 18 deletions
|
@ -10,8 +10,10 @@ module Annex.YoutubeDl where
|
|||
import Annex.Common
|
||||
import qualified Annex
|
||||
import Annex.Content
|
||||
import Annex.Url
|
||||
import Utility.Url (URLString)
|
||||
import Utility.DiskFree
|
||||
import Utility.HtmlDetect
|
||||
import Logs.Transfer
|
||||
|
||||
-- Runs youtube-dl in a work directory, to download a single media file
|
||||
|
@ -108,10 +110,19 @@ youtubeDlSupported :: URLString -> Annex Bool
|
|||
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url
|
||||
|
||||
-- Check if youtube-dl can find media in an url.
|
||||
--
|
||||
-- youtube-dl supports downloading urls that are not html pages,
|
||||
-- but we don't want to use it for such urls, since they can be downloaded
|
||||
-- without it. So, this first downloads part of the content and checks
|
||||
-- if it's a html page; only then is youtube-dl used.
|
||||
youtubeDlCheck :: URLString -> Annex (Either String Bool)
|
||||
youtubeDlCheck url = catchMsgIO $ do
|
||||
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
|
||||
liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
|
||||
uo <- getUrlOptions
|
||||
liftIO (downloadPartial url uo htmlPrefixLength) >>= \case
|
||||
Just bs | isHtmlBs bs -> do
|
||||
opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
|
||||
liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing
|
||||
_ -> return False
|
||||
|
||||
-- Ask youtube-dl for the filename of media in an url.
|
||||
--
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue