addurl, importfeed: Added --no-raw option
Forces eg, download with youtube-dl without falling back to raw download. Since youtube-dl failing due to an url not being supported is difficult to distinguish from it failing due to being blocked in some way, this can be useful to avoid the fallback of git-annex downloading the raw web page and adding that. Since --raw also prevents using special remotes, --no-raw also allows special remote downloads. Although it's always possible that some special remote may claim an url and fall back to raw download of the content, which --no-raw cannot prevent. Sponsored-by: Boyd Stephen Smith Jr. on Patreon
This commit is contained in:
parent
3a14648142
commit
b8e32e200e
5 changed files with 39 additions and 10 deletions
|
@ -9,6 +9,10 @@ git-annex (8.20210622) UNRELEASED; urgency=medium
|
||||||
* Dropping an unused object with drop --unused or dropunused will
|
* Dropping an unused object with drop --unused or dropunused will
|
||||||
mark it as dead, preventing fsck --all from complaining about it
|
mark it as dead, preventing fsck --all from complaining about it
|
||||||
after it's been dropped from all repositories.
|
after it's been dropped from all repositories.
|
||||||
|
* addurl, importfeed: Added --no-raw option that forces download
|
||||||
|
with youtube-dl or a special remote. In particular this can avoid
|
||||||
|
falling back to raw download when youtube-dl is blocked for some
|
||||||
|
reason.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 21 Jun 2021 12:25:25 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 21 Jun 2021 12:25:25 -0400
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex command
|
{- git-annex command
|
||||||
-
|
-
|
||||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -54,6 +54,7 @@ data AddUrlOptions = AddUrlOptions
|
||||||
data DownloadOptions = DownloadOptions
|
data DownloadOptions = DownloadOptions
|
||||||
{ relaxedOption :: Bool
|
{ relaxedOption :: Bool
|
||||||
, rawOption :: Bool
|
, rawOption :: Bool
|
||||||
|
, noRawOption :: Bool
|
||||||
, fileOption :: Maybe FilePath
|
, fileOption :: Maybe FilePath
|
||||||
, preserveFilenameOption :: Bool
|
, preserveFilenameOption :: Bool
|
||||||
, checkGitIgnoreOption :: CheckGitIgnore
|
, checkGitIgnoreOption :: CheckGitIgnore
|
||||||
|
@ -91,6 +92,10 @@ parseDownloadOptions withfileoptions = DownloadOptions
|
||||||
( long "raw"
|
( long "raw"
|
||||||
<> help "disable special handling for torrents, youtube-dl, etc"
|
<> help "disable special handling for torrents, youtube-dl, etc"
|
||||||
)
|
)
|
||||||
|
<*> switch
|
||||||
|
( long "no-raw"
|
||||||
|
<> help "prevent downloading raw url content, must use special handling"
|
||||||
|
)
|
||||||
<*> (if withfileoptions
|
<*> (if withfileoptions
|
||||||
then optional (strOption
|
then optional (strOption
|
||||||
( long "file" <> metavar paramFile
|
( long "file" <> metavar paramFile
|
||||||
|
@ -265,7 +270,7 @@ performWeb addunlockedmatcher o url file urlinfo = ifAnnexed file addurl geturl
|
||||||
addurl = addUrlChecked o url file webUUID $ \k ->
|
addurl = addUrlChecked o url file webUUID $ \k ->
|
||||||
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
|
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
|
||||||
( return (True, True, setDownloader url YoutubeDownloader)
|
( return (True, True, setDownloader url YoutubeDownloader)
|
||||||
, return (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url)
|
, checkRaw (downloadOptions o) $ return (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url)
|
||||||
)
|
)
|
||||||
|
|
||||||
{- Check that the url exists, and has the same size as the key,
|
{- Check that the url exists, and has the same size as the key,
|
||||||
|
@ -326,7 +331,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
|
||||||
in ifAnnexed f
|
in ifAnnexed f
|
||||||
(alreadyannexed (fromRawFilePath f))
|
(alreadyannexed (fromRawFilePath f))
|
||||||
(dl f)
|
(dl f)
|
||||||
Left _ -> normalfinish tmp
|
Left _ -> checkRaw o (normalfinish tmp)
|
||||||
where
|
where
|
||||||
dl dest = withTmpWorkDir mediakey $ \workdir -> do
|
dl dest = withTmpWorkDir mediakey $ \workdir -> do
|
||||||
let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)
|
let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)
|
||||||
|
@ -340,7 +345,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
|
||||||
showDestinationFile (fromRawFilePath dest)
|
showDestinationFile (fromRawFilePath dest)
|
||||||
addWorkTree canadd addunlockedmatcher webUUID mediaurl dest mediakey (Just (toRawFilePath mediafile))
|
addWorkTree canadd addunlockedmatcher webUUID mediaurl dest mediakey (Just (toRawFilePath mediafile))
|
||||||
return $ Just mediakey
|
return $ Just mediakey
|
||||||
Right Nothing -> normalfinish tmp
|
Right Nothing -> checkRaw o (normalfinish tmp)
|
||||||
Left msg -> do
|
Left msg -> do
|
||||||
cleanuptmp
|
cleanuptmp
|
||||||
warning msg
|
warning msg
|
||||||
|
@ -356,6 +361,11 @@ downloadWeb addunlockedmatcher o url urlinfo file =
|
||||||
else do
|
else do
|
||||||
warning $ dest ++ " already exists; not overwriting"
|
warning $ dest ++ " already exists; not overwriting"
|
||||||
return Nothing
|
return Nothing
|
||||||
|
|
||||||
|
checkRaw :: DownloadOptions -> Annex a -> Annex a
|
||||||
|
checkRaw o a
|
||||||
|
| noRawOption o = giveup "Unable to use youtube-dl or a special remote and --no-raw was specified."
|
||||||
|
| otherwise = a
|
||||||
|
|
||||||
{- The destination file is not known at start time unless the user provided
|
{- The destination file is not known at start time unless the user provided
|
||||||
- a filename. It's not displayed then for output consistency,
|
- a filename. It's not displayed then for output consistency,
|
||||||
|
@ -464,8 +474,9 @@ nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlIn
|
||||||
nodownloadWeb addunlockedmatcher o url urlinfo file
|
nodownloadWeb addunlockedmatcher o url urlinfo file
|
||||||
| Url.urlExists urlinfo = if rawOption o
|
| Url.urlExists urlinfo = if rawOption o
|
||||||
then nomedia
|
then nomedia
|
||||||
else either (const nomedia) (usemedia . toRawFilePath)
|
else youtubeDlFileName url >>= \case
|
||||||
=<< youtubeDlFileName url
|
Right mediafile -> usemedia (toRawFilePath mediafile)
|
||||||
|
Left _ -> checkRaw o nomedia
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
warning $ "unable to access url: " ++ url
|
warning $ "unable to access url: " ++ url
|
||||||
return Nothing
|
return Nothing
|
||||||
|
|
|
@ -42,7 +42,7 @@ import Types.MetaData
|
||||||
import Logs.MetaData
|
import Logs.MetaData
|
||||||
import Annex.MetaData
|
import Annex.MetaData
|
||||||
import Annex.FileMatcher
|
import Annex.FileMatcher
|
||||||
import Command.AddUrl (addWorkTree)
|
import Command.AddUrl (addWorkTree, checkRaw)
|
||||||
import Annex.UntrustedFilePath
|
import Annex.UntrustedFilePath
|
||||||
import qualified Annex.Branch
|
import qualified Annex.Branch
|
||||||
import Logs
|
import Logs
|
||||||
|
@ -185,7 +185,7 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl
|
||||||
let f' = fromRawFilePath f
|
let f' = fromRawFilePath f
|
||||||
r <- Remote.claimingUrl url
|
r <- Remote.claimingUrl url
|
||||||
if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
|
if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
|
||||||
then do
|
then checkRaw (downloadOptions opts) $ do
|
||||||
let dlopts = (downloadOptions opts)
|
let dlopts = (downloadOptions opts)
|
||||||
-- force using the filename
|
-- force using the filename
|
||||||
-- chosen here
|
-- chosen here
|
||||||
|
@ -326,8 +326,9 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl
|
||||||
, downloadlink
|
, downloadlink
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
downloadlink = performDownload addunlockedmatcher opts cache todownload
|
downloadlink = checkRaw (downloadOptions opts) $
|
||||||
{ location = Enclosure linkurl }
|
performDownload addunlockedmatcher opts cache todownload
|
||||||
|
{ location = Enclosure linkurl }
|
||||||
|
|
||||||
addmediafast linkurl mediaurl mediakey =
|
addmediafast linkurl mediaurl mediakey =
|
||||||
ifM (pure (not (rawOption (downloadOptions opts)))
|
ifM (pure (not (rawOption (downloadOptions opts)))
|
||||||
|
|
|
@ -49,6 +49,12 @@ be used to get better filenames.
|
||||||
special remotes. This will for example, make addurl
|
special remotes. This will for example, make addurl
|
||||||
download the .torrent file and not the contents it points to.
|
download the .torrent file and not the contents it points to.
|
||||||
|
|
||||||
|
* `--no-raw`
|
||||||
|
|
||||||
|
Require content pointed to by the url to be downloaded using youtube-dl
|
||||||
|
or a special remote, rather than the raw content of the url. if that
|
||||||
|
cannot be done, the add will fail.
|
||||||
|
|
||||||
* `--file=name`
|
* `--file=name`
|
||||||
|
|
||||||
Use with a filename that does not yet exist to add a new file
|
Use with a filename that does not yet exist to add a new file
|
||||||
|
|
|
@ -58,6 +58,13 @@ resulting in the new url being downloaded to such a filename.
|
||||||
special remotes. This will for example, make importfeed
|
special remotes. This will for example, make importfeed
|
||||||
download a .torrent file and not the contents it points to.
|
download a .torrent file and not the contents it points to.
|
||||||
|
|
||||||
|
* `--no-raw`
|
||||||
|
|
||||||
|
Require content pointed to by the url to be downloaded using youtube-dl
|
||||||
|
or a special remote, rather than the raw content of the url. if that
|
||||||
|
cannot be done, the import will fail, and the next import of the feed
|
||||||
|
will retry.
|
||||||
|
|
||||||
* `--template`
|
* `--template`
|
||||||
|
|
||||||
Controls where the files are stored.
|
Controls where the files are stored.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue