make --raw avoid ever running youtube-dl

added DownloadOptions type to avoid needing two different Bool params
for some functions.

This commit was sponsored by Thom May on Patreon.
This commit is contained in:
Joey Hess 2017-11-30 16:48:35 -04:00
parent 640cb36a5c
commit 5e95d54604
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 118 additions and 102 deletions

View file

@ -39,23 +39,23 @@ cmd = notBareRepo $ withGlobalOptions [jobsOption, jsonOption, jsonProgressOptio
data AddUrlOptions = AddUrlOptions data AddUrlOptions = AddUrlOptions
{ addUrls :: CmdParams { addUrls :: CmdParams
, fileOption :: Maybe FilePath
, pathdepthOption :: Maybe Int , pathdepthOption :: Maybe Int
, prefixOption :: Maybe String , prefixOption :: Maybe String
, suffixOption :: Maybe String , suffixOption :: Maybe String
, relaxedOption :: Bool , downloadOptions :: DownloadOptions
, rawOption :: Bool
, batchOption :: BatchMode , batchOption :: BatchMode
, batchFilesOption :: Bool , batchFilesOption :: Bool
} }
data DownloadOptions = DownloadOptions
{ relaxedOption :: Bool
, rawOption :: Bool
, fileOption :: Maybe FilePath
}
optParser :: CmdParamsDesc -> Parser AddUrlOptions optParser :: CmdParamsDesc -> Parser AddUrlOptions
optParser desc = AddUrlOptions optParser desc = AddUrlOptions
<$> cmdParams desc <$> cmdParams desc
<*> optional (strOption
( long "file" <> metavar paramFile
<> help "specify what file the url is added to"
))
<*> optional (option auto <*> optional (option auto
( long "pathdepth" <> metavar paramNumber ( long "pathdepth" <> metavar paramNumber
<> help "number of url path components to use in filename" <> help "number of url path components to use in filename"
@ -68,25 +68,29 @@ optParser desc = AddUrlOptions
( long "suffix" <> metavar paramValue ( long "suffix" <> metavar paramValue
<> help "add a suffix to the filename" <> help "add a suffix to the filename"
)) ))
<*> parseRelaxedOption <*> parseDownloadOptions True
<*> parseRawOption
<*> parseBatchOption <*> parseBatchOption
<*> switch <*> switch
( long "with-files" ( long "with-files"
<> help "parse batch mode lines of the form \"$url $file\"" <> help "parse batch mode lines of the form \"$url $file\""
) )
parseRelaxedOption :: Parser Bool parseDownloadOptions :: Bool -> Parser DownloadOptions
parseRelaxedOption = switch parseDownloadOptions withfileoption = DownloadOptions
( long "relaxed" <$> switch
<> help "skip size check" ( long "relaxed"
) <> help "skip size check"
)
parseRawOption :: Parser Bool <*> switch
parseRawOption = switch ( long "raw"
( long "raw" <> help "disable special handling for torrents, youtube-dl, etc"
<> help "disable special handling for torrents, youtube-dl, etc" )
) <*> if withfileoption
then optional (strOption
( long "file" <> metavar paramFile
<> help "specify what file the url is added to"
))
else pure Nothing
seek :: AddUrlOptions -> CommandSeek seek :: AddUrlOptions -> CommandSeek
seek o = allowConcurrentOutput $ do seek o = allowConcurrentOutput $ do
@ -97,7 +101,7 @@ seek o = allowConcurrentOutput $ do
where where
go (o', u) = do go (o', u) = do
r <- Remote.claimingUrl u r <- Remote.claimingUrl u
if Remote.uuid r == webUUID || rawOption o' if Remote.uuid r == webUUID || rawOption (downloadOptions o')
then void $ commandAction $ startWeb o' u then void $ commandAction $ startWeb o' u
else checkUrl r o' u else checkUrl r o' u
@ -107,13 +111,13 @@ parseBatchInput o s
let (u, f) = separate (== ' ') s let (u, f) = separate (== ' ') s
in if null u || null f in if null u || null f
then Left ("parsed empty url or filename in input: " ++ s) then Left ("parsed empty url or filename in input: " ++ s)
else Right (o { fileOption = Just f }, u) else Right (o { downloadOptions = (downloadOptions o) { fileOption = Just f } }, u)
| otherwise = Right (o, s) | otherwise = Right (o, s)
checkUrl :: Remote -> AddUrlOptions -> URLString -> Annex () checkUrl :: Remote -> AddUrlOptions -> URLString -> Annex ()
checkUrl r o u = do checkUrl r o u = do
pathmax <- liftIO $ fileNameLengthLimit "." pathmax <- liftIO $ fileNameLengthLimit "."
let deffile = fromMaybe (urlString2file u (pathdepthOption o) pathmax) (fileOption o) let deffile = fromMaybe (urlString2file u (pathdepthOption o) pathmax) (fileOption (downloadOptions o))
go deffile =<< maybe go deffile =<< maybe
(error $ "unable to checkUrl of " ++ Remote.name r) (error $ "unable to checkUrl of " ++ Remote.name r)
(tryNonAsync . flip id u) (tryNonAsync . flip id u)
@ -125,45 +129,44 @@ checkUrl r o u = do
warning (show e) warning (show e)
next $ next $ return False next $ next $ return False
go deffile (Right (UrlContents sz mf)) = do go deffile (Right (UrlContents sz mf)) = do
let f = adjustFile o (fromMaybe (maybe deffile fromSafeFilePath mf) (fileOption o)) let f = adjustFile o (fromMaybe (maybe deffile fromSafeFilePath mf) (fileOption (downloadOptions o)))
void $ commandAction $ void $ commandAction $ startRemote r o f u sz
startRemote r (relaxedOption o) f u sz
go deffile (Right (UrlMulti l)) go deffile (Right (UrlMulti l))
| isNothing (fileOption o) = | isNothing (fileOption (downloadOptions o)) =
forM_ l $ \(u', sz, f) -> do forM_ l $ \(u', sz, f) -> do
let f' = adjustFile o (deffile </> fromSafeFilePath f) let f' = adjustFile o (deffile </> fromSafeFilePath f)
void $ commandAction $ void $ commandAction $
startRemote r (relaxedOption o) f' u' sz startRemote r o f' u' sz
| otherwise = giveup $ unwords | otherwise = giveup $ unwords
[ "That url contains multiple files according to the" [ "That url contains multiple files according to the"
, Remote.name r , Remote.name r
, " remote; cannot add it to a single file." , " remote; cannot add it to a single file."
] ]
startRemote :: Remote -> Bool -> FilePath -> URLString -> Maybe Integer -> CommandStart startRemote :: Remote -> AddUrlOptions -> FilePath -> URLString -> Maybe Integer -> CommandStart
startRemote r relaxed file uri sz = do startRemote r o file uri sz = do
pathmax <- liftIO $ fileNameLengthLimit "." pathmax <- liftIO $ fileNameLengthLimit "."
let file' = joinPath $ map (truncateFilePath pathmax) $ splitDirectories file let file' = joinPath $ map (truncateFilePath pathmax) $ splitDirectories file
showStart' "addurl" (Just uri) showStart' "addurl" (Just uri)
showNote $ "from " ++ Remote.name r showNote $ "from " ++ Remote.name r
showDestinationFile file' showDestinationFile file'
next $ performRemote r relaxed uri file' sz next $ performRemote r o uri file' sz
performRemote :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> CommandPerform performRemote :: Remote -> AddUrlOptions -> URLString -> FilePath -> Maybe Integer -> CommandPerform
performRemote r relaxed uri file sz = ifAnnexed file adduri geturi performRemote r o uri file sz = ifAnnexed file adduri geturi
where where
loguri = setDownloader uri OtherDownloader loguri = setDownloader uri OtherDownloader
adduri = addUrlChecked relaxed loguri file (Remote.uuid r) checkexistssize adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize
checkexistssize key = return $ case sz of checkexistssize key = return $ case sz of
Nothing -> (True, True, uri) Nothing -> (True, True, uri)
Just n -> (True, n == fromMaybe n (keySize key), uri) Just n -> (True, n == fromMaybe n (keySize key), uri)
geturi = next $ isJust <$> downloadRemoteFile r relaxed uri file sz geturi = next $ isJust <$> downloadRemoteFile r (downloadOptions o) uri file sz
downloadRemoteFile :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key) downloadRemoteFile :: Remote -> DownloadOptions -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key)
downloadRemoteFile r relaxed uri file sz = checkCanAdd file $ do downloadRemoteFile r o uri file sz = checkCanAdd file $ do
let urlkey = Backend.URL.fromUrl uri sz let urlkey = Backend.URL.fromUrl uri sz
liftIO $ createDirectoryIfMissing True (parentDir file) liftIO $ createDirectoryIfMissing True (parentDir file)
ifM (Annex.getState Annex.fast <||> pure relaxed) ifM (Annex.getState Annex.fast <||> pure (relaxedOption o))
( do ( do
addWorkTree (Remote.uuid r) loguri file urlkey Nothing addWorkTree (Remote.uuid r) loguri file urlkey Nothing
return (Just urlkey) return (Just urlkey)
@ -190,10 +193,10 @@ startWeb o urlstring = go $ fromMaybe bad $ parseURI urlstring
go url = do go url = do
showStart' "addurl" (Just urlstring) showStart' "addurl" (Just urlstring)
pathmax <- liftIO $ fileNameLengthLimit "." pathmax <- liftIO $ fileNameLengthLimit "."
urlinfo <- if relaxedOption o urlinfo <- if relaxedOption (downloadOptions o)
then pure Url.assumeUrlExists then pure Url.assumeUrlExists
else Url.withUrlOptions (Url.getUrlInfo urlstring) else Url.withUrlOptions (Url.getUrlInfo urlstring)
file <- adjustFile o <$> case fileOption o of file <- adjustFile o <$> case fileOption (downloadOptions o) of
Just f -> pure f Just f -> pure f
Nothing -> case Url.urlSuggestedFile urlinfo of Nothing -> case Url.urlSuggestedFile urlinfo of
Nothing -> pure $ url2file url (pathdepthOption o) pathmax Nothing -> pure $ url2file url (pathdepthOption o) pathmax
@ -209,24 +212,24 @@ startWeb o urlstring = go $ fromMaybe bad $ parseURI urlstring
performWeb :: AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform performWeb :: AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform
performWeb o url file urlinfo = ifAnnexed file addurl geturl performWeb o url file urlinfo = ifAnnexed file addurl geturl
where where
geturl = next $ isJust <$> addUrlFile (Just o) (relaxedOption o) url urlinfo file geturl = next $ isJust <$> addUrlFile (downloadOptions o) url urlinfo file
addurl = addUrlChecked (relaxedOption o) url file webUUID $ \k -> addurl = addUrlChecked o url file webUUID $ \k ->
ifM (youtubeDlSupported url) ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
( return (True, True, setDownloader url YoutubeDownloader) ( return (True, True, setDownloader url YoutubeDownloader)
, return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url) , return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url)
) )
{- Check that the url exists, and has the same size as the key, {- Check that the url exists, and has the same size as the key,
- and add it as an url to the key. -} - and add it as an url to the key. -}
addUrlChecked :: Bool -> URLString -> FilePath -> UUID -> (Key -> Annex (Bool, Bool, URLString)) -> Key -> CommandPerform addUrlChecked :: AddUrlOptions -> URLString -> FilePath -> UUID -> (Key -> Annex (Bool, Bool, URLString)) -> Key -> CommandPerform
addUrlChecked relaxed url file u checkexistssize key = addUrlChecked o url file u checkexistssize key =
ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key)) ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key))
( do ( do
showDestinationFile file showDestinationFile file
next $ return True next $ return True
, do , do
(exists, samesize, url') <- checkexistssize key (exists, samesize, url') <- checkexistssize key
if exists && (samesize || relaxed) if exists && (samesize || relaxedOption (downloadOptions o))
then do then do
setUrlPresent u key url' setUrlPresent u key url'
next $ return True next $ return True
@ -243,15 +246,15 @@ addUrlChecked relaxed url file u checkexistssize key =
- different file, based on the title of the media. Unless the user - different file, based on the title of the media. Unless the user
- specified fileOption, which then forces using the FilePath. - specified fileOption, which then forces using the FilePath.
-} -}
addUrlFile :: Maybe AddUrlOptions -> Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) addUrlFile :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
addUrlFile mo relaxed url urlinfo file = addUrlFile o url urlinfo file =
ifM (Annex.getState Annex.fast <||> pure relaxed) ifM (Annex.getState Annex.fast <||> pure (relaxedOption o))
( nodownloadWeb mo url urlinfo file ( nodownloadWeb o url urlinfo file
, downloadWeb mo url urlinfo file , downloadWeb o url urlinfo file
) )
downloadWeb :: Maybe AddUrlOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) downloadWeb :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
downloadWeb mo url urlinfo file = downloadWeb o url urlinfo file =
go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file)) go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file))
where where
urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
@ -261,7 +264,7 @@ downloadWeb mo url urlinfo file =
go Nothing = return Nothing go Nothing = return Nothing
-- If we downloaded a html file, try to use youtube-dl to -- If we downloaded a html file, try to use youtube-dl to
-- extract embedded media. -- extract embedded media.
go (Just tmp) = ifM (liftIO $ isHtml <$> readFile tmp) go (Just tmp) = ifM (pure (not (rawOption o)) <&&> liftIO (isHtml <$> readFile tmp))
( tryyoutubedl tmp ( tryyoutubedl tmp
, normalfinish tmp , normalfinish tmp
) )
@ -276,7 +279,7 @@ downloadWeb mo url urlinfo file =
case dl of case dl of
Right (Just mediafile) -> do Right (Just mediafile) -> do
pruneTmpWorkDirBefore tmp (liftIO . nukeFile) pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
let dest = if isJust (fileOption =<< mo) let dest = if isJust (fileOption o)
then file then file
else takeFileName mediafile else takeFileName mediafile
checkCanAdd dest $ do checkCanAdd dest $ do
@ -374,18 +377,21 @@ addWorkTree u url file key mtmp = case mtmp of
, maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . nukeFile)) mtmp , maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . nukeFile)) mtmp
) )
nodownloadWeb :: Maybe AddUrlOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) nodownloadWeb :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
nodownloadWeb mo url urlinfo file nodownloadWeb o url urlinfo file
| Url.urlExists urlinfo = go =<< youtubeDlFileName url | Url.urlExists urlinfo = if rawOption o
then nomedia
else either (const nomedia) usemedia
=<< youtubeDlFileName url
| otherwise = do | otherwise = do
warning $ "unable to access url: " ++ url warning $ "unable to access url: " ++ url
return Nothing return Nothing
where where
go (Left _) = do nomedia = do
let key = Backend.URL.fromUrl url (Url.urlSize urlinfo) let key = Backend.URL.fromUrl url (Url.urlSize urlinfo)
nodownloadWeb' url key file nodownloadWeb' url key file
go (Right mediafile) = do usemedia mediafile = do
let dest = if isJust (fileOption =<< mo) let dest = if isJust (fileOption o)
then file then file
else takeFileName mediafile else takeFileName mediafile
let mediaurl = setDownloader url YoutubeDownloader let mediaurl = setDownloader url YoutubeDownloader

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2013 Joey Hess <id@joeyh.name> - Copyright 2013-2017 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -32,7 +32,7 @@ import Types.UrlContents
import Logs.Web import Logs.Web
import qualified Utility.Format import qualified Utility.Format
import Utility.Tmp import Utility.Tmp
import Command.AddUrl (addUrlFile, downloadRemoteFile, parseRelaxedOption, parseRawOption) import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, DownloadOptions(..))
import Annex.Perms import Annex.Perms
import Annex.UUID import Annex.UUID
import Backend.URL (fromUrl) import Backend.URL (fromUrl)
@ -51,8 +51,7 @@ cmd = notBareRepo $
data ImportFeedOptions = ImportFeedOptions data ImportFeedOptions = ImportFeedOptions
{ feedUrls :: CmdParams { feedUrls :: CmdParams
, templateOption :: Maybe String , templateOption :: Maybe String
, relaxedOption :: Bool , downloadOptions :: DownloadOptions
, rawOption :: Bool
} }
optParser :: CmdParamsDesc -> Parser ImportFeedOptions optParser :: CmdParamsDesc -> Parser ImportFeedOptions
@ -62,8 +61,7 @@ optParser desc = ImportFeedOptions
( long "template" <> metavar paramFormat ( long "template" <> metavar paramFormat
<> help "template for filenames" <> help "template for filenames"
)) ))
<*> parseRelaxedOption <*> parseDownloadOptions False
<*> parseRawOption
seek :: ImportFeedOptions -> CommandSeek seek :: ImportFeedOptions -> CommandSeek
seek o = do seek o = do
@ -165,12 +163,19 @@ performDownload opts cache todownload = case location todownload of
Enclosure url -> checkknown url $ Enclosure url -> checkknown url $
rundownload url (takeWhile (/= '?') $ takeExtension url) $ \f -> do rundownload url (takeWhile (/= '?') $ takeExtension url) $ \f -> do
r <- Remote.claimingUrl url r <- Remote.claimingUrl url
if Remote.uuid r == webUUID || rawOption opts if Remote.uuid r == webUUID || rawOption (downloadOptions opts)
then do then do
urlinfo <- if relaxedOption opts urlinfo <- if relaxedOption (downloadOptions opts)
then pure Url.assumeUrlExists then pure Url.assumeUrlExists
else Url.withUrlOptions (Url.getUrlInfo url) else Url.withUrlOptions (Url.getUrlInfo url)
maybeToList <$> addUrlFile Nothing (relaxedOption opts) url urlinfo f let dlopts = (downloadOptions opts)
-- force using the filename
-- chosen here
{ fileOption = Just f
-- don't use youtube-dl
, rawOption = True
}
maybeToList <$> addUrlFile dlopts url urlinfo f
else do else do
res <- tryNonAsync $ maybe res <- tryNonAsync $ maybe
(error $ "unable to checkUrl of " ++ Remote.name r) (error $ "unable to checkUrl of " ++ Remote.name r)
@ -180,10 +185,10 @@ performDownload opts cache todownload = case location todownload of
Left _ -> return [] Left _ -> return []
Right (UrlContents sz _) -> Right (UrlContents sz _) ->
maybeToList <$> maybeToList <$>
downloadRemoteFile r (relaxedOption opts) url f sz downloadRemoteFile r (downloadOptions opts) url f sz
Right (UrlMulti l) -> do Right (UrlMulti l) -> do
kl <- forM l $ \(url', sz, subf) -> kl <- forM l $ \(url', sz, subf) ->
downloadRemoteFile r (relaxedOption opts) url' (f </> fromSafeFilePath subf) sz downloadRemoteFile r (downloadOptions opts) url' (f </> fromSafeFilePath subf) sz
return $ if all isJust kl return $ if all isJust kl
then catMaybes kl then catMaybes kl
else [] else []
@ -196,7 +201,7 @@ performDownload opts cache todownload = case location todownload of
-- to avoid adding it a second time. -- to avoid adding it a second time.
let quviurl = setDownloader linkurl QuviDownloader let quviurl = setDownloader linkurl QuviDownloader
checkknown mediaurl $ checkknown quviurl $ checkknown mediaurl $ checkknown quviurl $
ifM (Annex.getState Annex.fast <||> pure (relaxedOption opts)) ifM (Annex.getState Annex.fast <||> pure (relaxedOption (downloadOptions opts)))
( addmediafast linkurl mediaurl mediakey ( addmediafast linkurl mediaurl mediakey
, downloadmedia linkurl mediaurl mediakey , downloadmedia linkurl mediaurl mediakey
) )
@ -261,36 +266,41 @@ performDownload opts cache todownload = case location todownload of
, tryanother , tryanother
) )
downloadmedia linkurl mediaurl mediakey = do downloadmedia linkurl mediaurl mediakey
r <- withTmpWorkDir mediakey $ \workdir -> do | rawOption (downloadOptions opts) = downloadlink
dl <- youtubeDl linkurl workdir | otherwise = do
case dl of r <- withTmpWorkDir mediakey $ \workdir -> do
Right (Just mediafile) -> do dl <- youtubeDl linkurl workdir
let ext = case takeExtension mediafile of case dl of
[] -> ".m" Right (Just mediafile) -> do
s -> s let ext = case takeExtension mediafile of
ok <- rundownload linkurl ext $ \f -> do [] -> ".m"
addWorkTree webUUID mediaurl f mediakey (Just mediafile) s -> s
return [mediakey] ok <- rundownload linkurl ext $ \f -> do
return (Just ok) addWorkTree webUUID mediaurl f mediakey (Just mediafile)
-- youtude-dl didn't support it, so return [mediakey]
-- download it as if the link were return (Just ok)
-- an enclosure. -- youtude-dl didn't support it, so
Right Nothing -> Just <$> -- download it as if the link were
performDownload opts cache todownload -- an enclosure.
{ location = Enclosure linkurl } Right Nothing -> Just <$> downloadlink
Left msg -> do Left msg -> do
warning msg warning msg
return Nothing return Nothing
return (fromMaybe False r) return (fromMaybe False r)
where
addmediafast linkurl mediaurl mediakey = ifM (youtubeDlSupported linkurl) downloadlink = performDownload opts cache todownload
( rundownload linkurl ".m" $ \f -> do
addWorkTree webUUID mediaurl f mediakey Nothing
return [mediakey]
, performDownload opts cache todownload
{ location = Enclosure linkurl } { location = Enclosure linkurl }
)
addmediafast linkurl mediaurl mediakey =
ifM (pure (not (rawOption (downloadOptions opts)))
<&&> youtubeDlSupported linkurl)
( rundownload linkurl ".m" $ \f -> do
addWorkTree webUUID mediaurl f mediakey Nothing
return [mediakey]
, performDownload opts cache todownload
{ location = Enclosure linkurl }
)
defaultTemplate :: String defaultTemplate :: String
defaultTemplate = "${feedtitle}/${itemtitle}${extension}" defaultTemplate = "${feedtitle}/${itemtitle}${extension}"