addurl, importfeed: Added --no-raw option

Forces eg, download with youtube-dl without falling back to raw download.

Since youtube-dl failing due to an url not being supported is difficult to
distinguish from it failing due to being blocked in some way, this can be
useful to avoid the fallback of git-annex downloading the raw web page and
adding that.

Since --raw also prevents using special remotes, --no-raw also
allows special remote downloads. Although it's always possible that some
special remote may claim an url and fall back to raw download of the
content, which --no-raw cannot prevent.

Sponsored-by: Boyd Stephen Smith Jr. on Patreon
This commit is contained in:
Joey Hess 2021-06-27 11:13:38 -04:00
parent 3a14648142
commit b8e32e200e
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 39 additions and 10 deletions

View file

@ -9,6 +9,10 @@ git-annex (8.20210622) UNRELEASED; urgency=medium
* Dropping an unused object with drop --unused or dropunused will
mark it as dead, preventing fsck --all from complaining about it
after it's been dropped from all repositories.
* addurl, importfeed: Added --no-raw option that forces download
with youtube-dl or a special remote. In particular this can avoid
falling back to raw download when youtube-dl is blocked for some
reason.
-- Joey Hess <id@joeyh.name> Mon, 21 Jun 2021 12:25:25 -0400

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -54,6 +54,7 @@ data AddUrlOptions = AddUrlOptions
data DownloadOptions = DownloadOptions
{ relaxedOption :: Bool
, rawOption :: Bool
, noRawOption :: Bool
, fileOption :: Maybe FilePath
, preserveFilenameOption :: Bool
, checkGitIgnoreOption :: CheckGitIgnore
@ -91,6 +92,10 @@ parseDownloadOptions withfileoptions = DownloadOptions
( long "raw"
<> help "disable special handling for torrents, youtube-dl, etc"
)
<*> switch
( long "no-raw"
<> help "prevent downloading raw url content, must use special handling"
)
<*> (if withfileoptions
then optional (strOption
( long "file" <> metavar paramFile
@ -265,7 +270,7 @@ performWeb addunlockedmatcher o url file urlinfo = ifAnnexed file addurl geturl
addurl = addUrlChecked o url file webUUID $ \k ->
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
( return (True, True, setDownloader url YoutubeDownloader)
, return (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url)
, checkRaw (downloadOptions o) $ return (Url.urlExists urlinfo, Url.urlSize urlinfo == fromKey keySize k, url)
)
{- Check that the url exists, and has the same size as the key,
@ -326,7 +331,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
in ifAnnexed f
(alreadyannexed (fromRawFilePath f))
(dl f)
Left _ -> normalfinish tmp
Left _ -> checkRaw o (normalfinish tmp)
where
dl dest = withTmpWorkDir mediakey $ \workdir -> do
let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)
@ -340,7 +345,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
showDestinationFile (fromRawFilePath dest)
addWorkTree canadd addunlockedmatcher webUUID mediaurl dest mediakey (Just (toRawFilePath mediafile))
return $ Just mediakey
Right Nothing -> normalfinish tmp
Right Nothing -> checkRaw o (normalfinish tmp)
Left msg -> do
cleanuptmp
warning msg
@ -356,6 +361,11 @@ downloadWeb addunlockedmatcher o url urlinfo file =
else do
warning $ dest ++ " already exists; not overwriting"
return Nothing
checkRaw :: DownloadOptions -> Annex a -> Annex a
checkRaw o a
| noRawOption o = giveup "Unable to use youtube-dl or a special remote and --no-raw was specified."
| otherwise = a
{- The destination file is not known at start time unless the user provided
- a filename. It's not displayed then for output consistency,
@ -464,8 +474,9 @@ nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlIn
nodownloadWeb addunlockedmatcher o url urlinfo file
| Url.urlExists urlinfo = if rawOption o
then nomedia
else either (const nomedia) (usemedia . toRawFilePath)
=<< youtubeDlFileName url
else youtubeDlFileName url >>= \case
Right mediafile -> usemedia (toRawFilePath mediafile)
Left _ -> checkRaw o nomedia
| otherwise = do
warning $ "unable to access url: " ++ url
return Nothing

View file

@ -42,7 +42,7 @@ import Types.MetaData
import Logs.MetaData
import Annex.MetaData
import Annex.FileMatcher
import Command.AddUrl (addWorkTree)
import Command.AddUrl (addWorkTree, checkRaw)
import Annex.UntrustedFilePath
import qualified Annex.Branch
import Logs
@ -185,7 +185,7 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl
let f' = fromRawFilePath f
r <- Remote.claimingUrl url
if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
then do
then checkRaw (downloadOptions opts) $ do
let dlopts = (downloadOptions opts)
-- force using the filename
-- chosen here
@ -326,8 +326,9 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl
, downloadlink
)
where
downloadlink = performDownload addunlockedmatcher opts cache todownload
{ location = Enclosure linkurl }
downloadlink = checkRaw (downloadOptions opts) $
performDownload addunlockedmatcher opts cache todownload
{ location = Enclosure linkurl }
addmediafast linkurl mediaurl mediakey =
ifM (pure (not (rawOption (downloadOptions opts)))

View file

@ -49,6 +49,12 @@ be used to get better filenames.
special remotes. This will for example, make addurl
download the .torrent file and not the contents it points to.
* `--no-raw`
Require content pointed to by the url to be downloaded using youtube-dl
or a special remote, rather than the raw content of the url. if that
cannot be done, the add will fail.
* `--file=name`
Use with a filename that does not yet exist to add a new file

View file

@ -58,6 +58,13 @@ resulting in the new url being downloaded to such a filename.
special remotes. This will for example, make importfeed
download a .torrent file and not the contents it points to.
* `--no-raw`
Require content pointed to by the url to be downloaded using youtube-dl
or a special remote, rather than the raw content of the url. if that
cannot be done, the import will fail, and the next import of the feed
will retry.
* `--template`
Controls where the files are stored.