addurl, importfeed: Added --raw-except option

--raw-except=web allows using yt-dlp but not any other special remotes.

Currently this option can only be used once, trying to use it repeatedly
will make option parsing fail. Perhaps it ought to support being used more
than once, but it seemed like an unlikely use case to need that.

Note that getParsed is called repeatedly when the option is used with
several urls. While implementing DeferredParseClass would avoid that
innefficiency, it didn't seem worth the added boilerplate since
getParsed only calls byNameWithUUID which does minimal work.

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2024-02-05 15:16:25 -04:00
parent c0c85a7de4
commit 6b38d0c427
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 54 additions and 15 deletions

View file

@ -64,6 +64,7 @@ data DownloadOptions = DownloadOptions
{ relaxedOption :: Bool
, rawOption :: Bool
, noRawOption :: Bool
, rawExceptOption :: Maybe (DeferredParse Remote)
, fileOption :: Maybe FilePath
, preserveFilenameOption :: Bool
, checkGitIgnoreOption :: CheckGitIgnore
@ -105,6 +106,13 @@ parseDownloadOptions withfileoptions = DownloadOptions
( long "no-raw"
<> help "prevent downloading raw url content, must use special handling"
)
<*> optional
(mkParseRemoteOption <$> strOption
( long "raw-except" <> metavar paramRemote
<> help "disable special handling except by this remote"
<> completeRemotes
)
)
<*> (if withfileoptions
then optional (strOption
( long "file" <> metavar paramFile
@ -123,7 +131,7 @@ seek :: AddUrlOptions -> CommandSeek
seek o = startConcurrency commandStages $ do
addunlockedmatcher <- addUnlockedMatcher
let go (si, (o', u)) = do
r <- Remote.claimingUrl u
r <- checkClaimingUrl (downloadOptions o) u
if Remote.uuid r == webUUID || rawOption (downloadOptions o')
then void $ commandAction $
startWeb addunlockedmatcher o' si u
@ -133,6 +141,13 @@ seek o = startConcurrency commandStages $ do
batchInput fmt (pure . parseBatchInput o) go
NoBatch -> forM_ (addUrls o) (\u -> go (SeekInput [u], (o, u)))
checkClaimingUrl :: DownloadOptions -> URLString -> Annex Remote
checkClaimingUrl o u = do
allowedremote <- case rawExceptOption o of
Nothing -> pure (const True)
Just f -> (==) <$> getParsed f
Remote.claimingUrl' allowedremote u
parseBatchInput :: AddUrlOptions -> String -> Either String (AddUrlOptions, URLString)
parseBatchInput o s
| batchFilesOption o =
@ -284,7 +299,7 @@ performWeb addunlockedmatcher o url file urlinfo = lookupKey file >>= \case
where
geturl = next $ isJust <$> addUrlFile addunlockedmatcher (downloadOptions o) url urlinfo file
addurl = addUrlChecked o url file webUUID $ \k ->
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
ifM (useYoutubeDl (downloadOptions o) <&&> youtubeDlSupported url)
( return (Just (True, True, setDownloader url YoutubeDownloader))
, checkRaw Nothing (downloadOptions o) (pure Nothing) $
return (Just (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url))
@ -332,7 +347,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
downloader f p = Url.withUrlOptions $ downloadUrl False urlkey p Nothing [url] f
go Nothing = return Nothing
go (Just (tmp, backend)) = ifM (pure (not (rawOption o)) <&&> liftIO (isHtmlFile (fromRawFilePath tmp)))
go (Just (tmp, backend)) = ifM (useYoutubeDl o <&&> liftIO (isHtmlFile (fromRawFilePath tmp)))
( tryyoutubedl tmp backend
, normalfinish tmp backend
)
@ -394,6 +409,15 @@ checkRaw failreason o f a
f
| otherwise = a
useYoutubeDl :: DownloadOptions -> Annex Bool
useYoutubeDl o
| rawOption o = pure False
| otherwise = case rawExceptOption o of
Nothing -> pure True
Just f -> do
remote <- getParsed f
pure (Remote.uuid remote == webUUID)
{- The destination file is not known at start time unless the user provided
- a filename. It's not displayed then for output consistency,
- but is added to the json when available. -}

View file

@ -38,7 +38,7 @@ import Logs.File
import qualified Utility.Format
import Utility.Tmp
import Utility.Metered
import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, DownloadOptions(..), checkCanAdd, addWorkTree, checkRaw)
import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, DownloadOptions(..), checkClaimingUrl, checkCanAdd, addWorkTree, checkRaw, useYoutubeDl)
import Annex.UUID
import Backend.URL (fromUrl)
import Annex.Content
@ -317,9 +317,8 @@ startDownload addunlockedmatcher opts cache cv todownload = case location todown
startdownloadenclosure url = checkknown url $ startUrlDownload cv todownload url $
downloadEnclosure addunlockedmatcher opts cache cv todownload url
downloadmedia linkurl mediaurl mediakey
| rawOption (downloadOptions opts) = startdownloadlink
| otherwise = ifM (youtubeDlSupported linkurl)
downloadmedia linkurl mediaurl mediakey =
ifM (useYoutubeDl (downloadOptions opts) <&&> youtubeDlSupported linkurl)
( startUrlDownload cv todownload linkurl $
withTmpWorkDir mediakey $ \workdir -> do
dl <- youtubeDl linkurl (fromRawFilePath workdir) nullMeterUpdate
@ -348,7 +347,7 @@ startDownload addunlockedmatcher opts cache cv todownload = case location todown
contdownloadlink = downloadEnclosure addunlockedmatcher opts cache cv todownload linkurl
addmediafast linkurl mediaurl mediakey =
ifM (pure (not (rawOption (downloadOptions opts)))
ifM (useYoutubeDl (downloadOptions opts)
<&&> (pure (youtubedlscraped todownload) <||> youtubeDlSupported linkurl))
( startUrlDownload cv todownload linkurl $ do
runDownload todownload linkurl ".m" cache cv $ \f ->
@ -362,7 +361,7 @@ downloadEnclosure :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> TMVar B
downloadEnclosure addunlockedmatcher opts cache cv todownload url =
runDownload todownload url (takeWhile (/= '?') $ takeExtension url) cache cv $ \f -> do
let f' = fromRawFilePath f
r <- Remote.claimingUrl url
r <- checkClaimingUrl (downloadOptions opts) url
if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
then checkRaw (Just url) (downloadOptions opts) (pure Nothing) $ do
let dlopts = (downloadOptions opts)