addurl, importfeed: Added --raw-except option

--raw-except=web allows using yt-dlp but not any other special remotes.

Currently this option can only be used once, trying to use it repeatedly
will make option parsing fail. Perhaps it ought to support being used more
than once, but it seemed like an unlikely use case to need that.

Note that getParsed is called repeatedly when the option is used with
several urls. While implementing DeferredParseClass would avoid that
innefficiency, it didn't seem worth the added boilerplate since
getParsed only calls byNameWithUUID which does minimal work.

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2024-02-05 15:16:25 -04:00
parent c0c85a7de4
commit 6b38d0c427
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 54 additions and 15 deletions

View file

@ -64,6 +64,7 @@ data DownloadOptions = DownloadOptions
{ relaxedOption :: Bool
, rawOption :: Bool
, noRawOption :: Bool
, rawExceptOption :: Maybe (DeferredParse Remote)
, fileOption :: Maybe FilePath
, preserveFilenameOption :: Bool
, checkGitIgnoreOption :: CheckGitIgnore
@ -105,6 +106,13 @@ parseDownloadOptions withfileoptions = DownloadOptions
( long "no-raw"
<> help "prevent downloading raw url content, must use special handling"
)
<*> optional
(mkParseRemoteOption <$> strOption
( long "raw-except" <> metavar paramRemote
<> help "disable special handling except by this remote"
<> completeRemotes
)
)
<*> (if withfileoptions
then optional (strOption
( long "file" <> metavar paramFile
@ -123,7 +131,7 @@ seek :: AddUrlOptions -> CommandSeek
seek o = startConcurrency commandStages $ do
addunlockedmatcher <- addUnlockedMatcher
let go (si, (o', u)) = do
r <- Remote.claimingUrl u
r <- checkClaimingUrl (downloadOptions o) u
if Remote.uuid r == webUUID || rawOption (downloadOptions o')
then void $ commandAction $
startWeb addunlockedmatcher o' si u
@ -133,6 +141,13 @@ seek o = startConcurrency commandStages $ do
batchInput fmt (pure . parseBatchInput o) go
NoBatch -> forM_ (addUrls o) (\u -> go (SeekInput [u], (o, u)))
checkClaimingUrl :: DownloadOptions -> URLString -> Annex Remote
checkClaimingUrl o u = do
allowedremote <- case rawExceptOption o of
Nothing -> pure (const True)
Just f -> (==) <$> getParsed f
Remote.claimingUrl' allowedremote u
parseBatchInput :: AddUrlOptions -> String -> Either String (AddUrlOptions, URLString)
parseBatchInput o s
| batchFilesOption o =
@ -284,7 +299,7 @@ performWeb addunlockedmatcher o url file urlinfo = lookupKey file >>= \case
where
geturl = next $ isJust <$> addUrlFile addunlockedmatcher (downloadOptions o) url urlinfo file
addurl = addUrlChecked o url file webUUID $ \k ->
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
ifM (useYoutubeDl (downloadOptions o) <&&> youtubeDlSupported url)
( return (Just (True, True, setDownloader url YoutubeDownloader))
, checkRaw Nothing (downloadOptions o) (pure Nothing) $
return (Just (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url))
@ -332,7 +347,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
downloader f p = Url.withUrlOptions $ downloadUrl False urlkey p Nothing [url] f
go Nothing = return Nothing
go (Just (tmp, backend)) = ifM (pure (not (rawOption o)) <&&> liftIO (isHtmlFile (fromRawFilePath tmp)))
go (Just (tmp, backend)) = ifM (useYoutubeDl o <&&> liftIO (isHtmlFile (fromRawFilePath tmp)))
( tryyoutubedl tmp backend
, normalfinish tmp backend
)
@ -394,6 +409,15 @@ checkRaw failreason o f a
f
| otherwise = a
useYoutubeDl :: DownloadOptions -> Annex Bool
useYoutubeDl o
| rawOption o = pure False
| otherwise = case rawExceptOption o of
Nothing -> pure True
Just f -> do
remote <- getParsed f
pure (Remote.uuid remote == webUUID)
{- The destination file is not known at start time unless the user provided
- a filename. It's not displayed then for output consistency,
- but is added to the json when available. -}