addurl, importfeed: Added --raw-except option
--raw-except=web allows using yt-dlp but not any other special remotes. Currently this option can only be used once, trying to use it repeatedly will make option parsing fail. Perhaps it ought to support being used more than once, but it seemed like an unlikely use case to need that. Note that getParsed is called repeatedly when the option is used with several urls. While implementing DeferredParseClass would avoid that innefficiency, it didn't seem worth the added boilerplate since getParsed only calls byNameWithUUID which does minimal work. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
c0c85a7de4
commit
6b38d0c427
7 changed files with 54 additions and 15 deletions
|
@ -7,6 +7,7 @@ git-annex (10.20240130) UNRELEASED; urgency=medium
|
||||||
10.20230626.)
|
10.20230626.)
|
||||||
* importfeed --force: Avoid creating duplicates of existing
|
* importfeed --force: Avoid creating duplicates of existing
|
||||||
already downloaded files when yt-dlp or a special remote was used.
|
already downloaded files when yt-dlp or a special remote was used.
|
||||||
|
* addurl, importfeed: Added --raw-except option.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 29 Jan 2024 15:59:33 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 29 Jan 2024 15:59:33 -0400
|
||||||
|
|
||||||
|
|
|
@ -64,6 +64,7 @@ data DownloadOptions = DownloadOptions
|
||||||
{ relaxedOption :: Bool
|
{ relaxedOption :: Bool
|
||||||
, rawOption :: Bool
|
, rawOption :: Bool
|
||||||
, noRawOption :: Bool
|
, noRawOption :: Bool
|
||||||
|
, rawExceptOption :: Maybe (DeferredParse Remote)
|
||||||
, fileOption :: Maybe FilePath
|
, fileOption :: Maybe FilePath
|
||||||
, preserveFilenameOption :: Bool
|
, preserveFilenameOption :: Bool
|
||||||
, checkGitIgnoreOption :: CheckGitIgnore
|
, checkGitIgnoreOption :: CheckGitIgnore
|
||||||
|
@ -105,6 +106,13 @@ parseDownloadOptions withfileoptions = DownloadOptions
|
||||||
( long "no-raw"
|
( long "no-raw"
|
||||||
<> help "prevent downloading raw url content, must use special handling"
|
<> help "prevent downloading raw url content, must use special handling"
|
||||||
)
|
)
|
||||||
|
<*> optional
|
||||||
|
(mkParseRemoteOption <$> strOption
|
||||||
|
( long "raw-except" <> metavar paramRemote
|
||||||
|
<> help "disable special handling except by this remote"
|
||||||
|
<> completeRemotes
|
||||||
|
)
|
||||||
|
)
|
||||||
<*> (if withfileoptions
|
<*> (if withfileoptions
|
||||||
then optional (strOption
|
then optional (strOption
|
||||||
( long "file" <> metavar paramFile
|
( long "file" <> metavar paramFile
|
||||||
|
@ -123,7 +131,7 @@ seek :: AddUrlOptions -> CommandSeek
|
||||||
seek o = startConcurrency commandStages $ do
|
seek o = startConcurrency commandStages $ do
|
||||||
addunlockedmatcher <- addUnlockedMatcher
|
addunlockedmatcher <- addUnlockedMatcher
|
||||||
let go (si, (o', u)) = do
|
let go (si, (o', u)) = do
|
||||||
r <- Remote.claimingUrl u
|
r <- checkClaimingUrl (downloadOptions o) u
|
||||||
if Remote.uuid r == webUUID || rawOption (downloadOptions o')
|
if Remote.uuid r == webUUID || rawOption (downloadOptions o')
|
||||||
then void $ commandAction $
|
then void $ commandAction $
|
||||||
startWeb addunlockedmatcher o' si u
|
startWeb addunlockedmatcher o' si u
|
||||||
|
@ -133,6 +141,13 @@ seek o = startConcurrency commandStages $ do
|
||||||
batchInput fmt (pure . parseBatchInput o) go
|
batchInput fmt (pure . parseBatchInput o) go
|
||||||
NoBatch -> forM_ (addUrls o) (\u -> go (SeekInput [u], (o, u)))
|
NoBatch -> forM_ (addUrls o) (\u -> go (SeekInput [u], (o, u)))
|
||||||
|
|
||||||
|
checkClaimingUrl :: DownloadOptions -> URLString -> Annex Remote
|
||||||
|
checkClaimingUrl o u = do
|
||||||
|
allowedremote <- case rawExceptOption o of
|
||||||
|
Nothing -> pure (const True)
|
||||||
|
Just f -> (==) <$> getParsed f
|
||||||
|
Remote.claimingUrl' allowedremote u
|
||||||
|
|
||||||
parseBatchInput :: AddUrlOptions -> String -> Either String (AddUrlOptions, URLString)
|
parseBatchInput :: AddUrlOptions -> String -> Either String (AddUrlOptions, URLString)
|
||||||
parseBatchInput o s
|
parseBatchInput o s
|
||||||
| batchFilesOption o =
|
| batchFilesOption o =
|
||||||
|
@ -284,7 +299,7 @@ performWeb addunlockedmatcher o url file urlinfo = lookupKey file >>= \case
|
||||||
where
|
where
|
||||||
geturl = next $ isJust <$> addUrlFile addunlockedmatcher (downloadOptions o) url urlinfo file
|
geturl = next $ isJust <$> addUrlFile addunlockedmatcher (downloadOptions o) url urlinfo file
|
||||||
addurl = addUrlChecked o url file webUUID $ \k ->
|
addurl = addUrlChecked o url file webUUID $ \k ->
|
||||||
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
|
ifM (useYoutubeDl (downloadOptions o) <&&> youtubeDlSupported url)
|
||||||
( return (Just (True, True, setDownloader url YoutubeDownloader))
|
( return (Just (True, True, setDownloader url YoutubeDownloader))
|
||||||
, checkRaw Nothing (downloadOptions o) (pure Nothing) $
|
, checkRaw Nothing (downloadOptions o) (pure Nothing) $
|
||||||
return (Just (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url))
|
return (Just (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url))
|
||||||
|
@ -332,7 +347,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
|
||||||
urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
|
urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
|
||||||
downloader f p = Url.withUrlOptions $ downloadUrl False urlkey p Nothing [url] f
|
downloader f p = Url.withUrlOptions $ downloadUrl False urlkey p Nothing [url] f
|
||||||
go Nothing = return Nothing
|
go Nothing = return Nothing
|
||||||
go (Just (tmp, backend)) = ifM (pure (not (rawOption o)) <&&> liftIO (isHtmlFile (fromRawFilePath tmp)))
|
go (Just (tmp, backend)) = ifM (useYoutubeDl o <&&> liftIO (isHtmlFile (fromRawFilePath tmp)))
|
||||||
( tryyoutubedl tmp backend
|
( tryyoutubedl tmp backend
|
||||||
, normalfinish tmp backend
|
, normalfinish tmp backend
|
||||||
)
|
)
|
||||||
|
@ -394,6 +409,15 @@ checkRaw failreason o f a
|
||||||
f
|
f
|
||||||
| otherwise = a
|
| otherwise = a
|
||||||
|
|
||||||
|
useYoutubeDl :: DownloadOptions -> Annex Bool
|
||||||
|
useYoutubeDl o
|
||||||
|
| rawOption o = pure False
|
||||||
|
| otherwise = case rawExceptOption o of
|
||||||
|
Nothing -> pure True
|
||||||
|
Just f -> do
|
||||||
|
remote <- getParsed f
|
||||||
|
pure (Remote.uuid remote == webUUID)
|
||||||
|
|
||||||
{- The destination file is not known at start time unless the user provided
|
{- The destination file is not known at start time unless the user provided
|
||||||
- a filename. It's not displayed then for output consistency,
|
- a filename. It's not displayed then for output consistency,
|
||||||
- but is added to the json when available. -}
|
- but is added to the json when available. -}
|
||||||
|
|
|
@ -38,7 +38,7 @@ import Logs.File
|
||||||
import qualified Utility.Format
|
import qualified Utility.Format
|
||||||
import Utility.Tmp
|
import Utility.Tmp
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, DownloadOptions(..), checkCanAdd, addWorkTree, checkRaw)
|
import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, DownloadOptions(..), checkClaimingUrl, checkCanAdd, addWorkTree, checkRaw, useYoutubeDl)
|
||||||
import Annex.UUID
|
import Annex.UUID
|
||||||
import Backend.URL (fromUrl)
|
import Backend.URL (fromUrl)
|
||||||
import Annex.Content
|
import Annex.Content
|
||||||
|
@ -317,9 +317,8 @@ startDownload addunlockedmatcher opts cache cv todownload = case location todown
|
||||||
startdownloadenclosure url = checkknown url $ startUrlDownload cv todownload url $
|
startdownloadenclosure url = checkknown url $ startUrlDownload cv todownload url $
|
||||||
downloadEnclosure addunlockedmatcher opts cache cv todownload url
|
downloadEnclosure addunlockedmatcher opts cache cv todownload url
|
||||||
|
|
||||||
downloadmedia linkurl mediaurl mediakey
|
downloadmedia linkurl mediaurl mediakey =
|
||||||
| rawOption (downloadOptions opts) = startdownloadlink
|
ifM (useYoutubeDl (downloadOptions opts) <&&> youtubeDlSupported linkurl)
|
||||||
| otherwise = ifM (youtubeDlSupported linkurl)
|
|
||||||
( startUrlDownload cv todownload linkurl $
|
( startUrlDownload cv todownload linkurl $
|
||||||
withTmpWorkDir mediakey $ \workdir -> do
|
withTmpWorkDir mediakey $ \workdir -> do
|
||||||
dl <- youtubeDl linkurl (fromRawFilePath workdir) nullMeterUpdate
|
dl <- youtubeDl linkurl (fromRawFilePath workdir) nullMeterUpdate
|
||||||
|
@ -348,7 +347,7 @@ startDownload addunlockedmatcher opts cache cv todownload = case location todown
|
||||||
contdownloadlink = downloadEnclosure addunlockedmatcher opts cache cv todownload linkurl
|
contdownloadlink = downloadEnclosure addunlockedmatcher opts cache cv todownload linkurl
|
||||||
|
|
||||||
addmediafast linkurl mediaurl mediakey =
|
addmediafast linkurl mediaurl mediakey =
|
||||||
ifM (pure (not (rawOption (downloadOptions opts)))
|
ifM (useYoutubeDl (downloadOptions opts)
|
||||||
<&&> (pure (youtubedlscraped todownload) <||> youtubeDlSupported linkurl))
|
<&&> (pure (youtubedlscraped todownload) <||> youtubeDlSupported linkurl))
|
||||||
( startUrlDownload cv todownload linkurl $ do
|
( startUrlDownload cv todownload linkurl $ do
|
||||||
runDownload todownload linkurl ".m" cache cv $ \f ->
|
runDownload todownload linkurl ".m" cache cv $ \f ->
|
||||||
|
@ -362,7 +361,7 @@ downloadEnclosure :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> TMVar B
|
||||||
downloadEnclosure addunlockedmatcher opts cache cv todownload url =
|
downloadEnclosure addunlockedmatcher opts cache cv todownload url =
|
||||||
runDownload todownload url (takeWhile (/= '?') $ takeExtension url) cache cv $ \f -> do
|
runDownload todownload url (takeWhile (/= '?') $ takeExtension url) cache cv $ \f -> do
|
||||||
let f' = fromRawFilePath f
|
let f' = fromRawFilePath f
|
||||||
r <- Remote.claimingUrl url
|
r <- checkClaimingUrl (downloadOptions opts) url
|
||||||
if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
|
if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
|
||||||
then checkRaw (Just url) (downloadOptions opts) (pure Nothing) $ do
|
then checkRaw (Just url) (downloadOptions opts) (pure Nothing) $ do
|
||||||
let dlopts = (downloadOptions opts)
|
let dlopts = (downloadOptions opts)
|
||||||
|
|
13
Remote.hs
13
Remote.hs
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex remotes
|
{- git-annex remotes
|
||||||
-
|
-
|
||||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -59,6 +59,7 @@ module Remote (
|
||||||
logStatus,
|
logStatus,
|
||||||
checkAvailable,
|
checkAvailable,
|
||||||
claimingUrl,
|
claimingUrl,
|
||||||
|
claimingUrl',
|
||||||
isExportSupported,
|
isExportSupported,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
@ -432,9 +433,15 @@ hasKeyCheap = checkPresentCheap
|
||||||
|
|
||||||
{- The web special remote claims urls by default. -}
|
{- The web special remote claims urls by default. -}
|
||||||
claimingUrl :: URLString -> Annex Remote
|
claimingUrl :: URLString -> Annex Remote
|
||||||
claimingUrl url = do
|
claimingUrl = claimingUrl' (const True)
|
||||||
|
|
||||||
|
{- The web special remote still claims urls if there is no
|
||||||
|
- other remote that does, even when the remotefilter does
|
||||||
|
- not include it. -}
|
||||||
|
claimingUrl' :: (Remote -> Bool) -> URLString -> Annex Remote
|
||||||
|
claimingUrl' remotefilter url = do
|
||||||
rs <- remoteList
|
rs <- remoteList
|
||||||
let web = Prelude.head $ filter (\r -> uuid r == webUUID) rs
|
let web = Prelude.head $ filter (\r -> uuid r == webUUID) rs
|
||||||
fromMaybe web <$> firstM checkclaim rs
|
fromMaybe web <$> firstM checkclaim (filter remotefilter rs)
|
||||||
where
|
where
|
||||||
checkclaim = maybe (pure False) (`id` url) . claimUrl
|
checkclaim = maybe (pure False) (`id` url) . claimUrl
|
||||||
|
|
|
@ -45,8 +45,8 @@ be used to get better filenames.
|
||||||
|
|
||||||
* `--raw`
|
* `--raw`
|
||||||
|
|
||||||
Prevent special handling of urls by yt-dlp, bittorrent, and other
|
Prevent special handling of urls by yt-dlp, and by bittorrent
|
||||||
special remotes. This will for example, make addurl
|
and other special remotes. This will for example, make addurl
|
||||||
download the .torrent file and not the contents it points to.
|
download the .torrent file and not the contents it points to.
|
||||||
|
|
||||||
* `--no-raw`
|
* `--no-raw`
|
||||||
|
@ -55,6 +55,12 @@ be used to get better filenames.
|
||||||
or a special remote, rather than the raw content of the url. if that
|
or a special remote, rather than the raw content of the url. if that
|
||||||
cannot be done, the add will fail.
|
cannot be done, the add will fail.
|
||||||
|
|
||||||
|
* `--raw-except=remote`
|
||||||
|
|
||||||
|
Prevent special handling of urls by all special remotes except
|
||||||
|
for the specified one. To allow special handling only
|
||||||
|
by yt-dlp, use `--raw-except=web`.
|
||||||
|
|
||||||
* `--file=name`
|
* `--file=name`
|
||||||
|
|
||||||
Use with a filename that does not yet exist to add a new file
|
Use with a filename that does not yet exist to add a new file
|
||||||
|
|
|
@ -37,7 +37,7 @@ resulting in the new url being downloaded to such a filename.
|
||||||
|
|
||||||
Force downloading items it's seen before.
|
Force downloading items it's seen before.
|
||||||
|
|
||||||
* `--relaxed`, `--fast`, `--raw`
|
* `--relaxed`, `--fast`, `--raw`, `--raw-except`
|
||||||
|
|
||||||
These options behave the same as when using [[git-annex-addurl]](1).
|
These options behave the same as when using [[git-annex-addurl]](1).
|
||||||
|
|
||||||
|
|
|
@ -3,3 +3,5 @@ I think generally we would still prefer to use `--raw` as to avoid possible side
|
||||||
|
|
||||||
[[!meta author=yoh]]
|
[[!meta author=yoh]]
|
||||||
[[!tag projects/dandi]]
|
[[!tag projects/dandi]]
|
||||||
|
|
||||||
|
> [[done]] --[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue