diff --git a/Annex.hs b/Annex.hs index 32a303239a..427c479d80 100644 --- a/Annex.hs +++ b/Annex.hs @@ -64,7 +64,6 @@ import Types.LockCache import Types.DesktopNotify import Types.CleanupActions import qualified Database.Keys.Handle as Keys -import Utility.Quvi (QuviVersion) import Utility.InodeCache import Utility.Url @@ -134,7 +133,6 @@ data AnnexState = AnnexState , errcounter :: Integer , unusedkeys :: Maybe (S.Set Key) , tempurls :: M.Map Key URLString - , quviversion :: Maybe QuviVersion , existinghooks :: M.Map Git.Hook.Hook Bool , desktopnotify :: DesktopNotify , workers :: [Either AnnexState (Async AnnexState)] @@ -190,7 +188,6 @@ newState c r = do , errcounter = 0 , unusedkeys = Nothing , tempurls = M.empty - , quviversion = Nothing , existinghooks = M.empty , desktopnotify = mempty , workers = [] diff --git a/Annex/Quvi.hs b/Annex/Quvi.hs deleted file mode 100644 index 79b9ee6faa..0000000000 --- a/Annex/Quvi.hs +++ /dev/null @@ -1,33 +0,0 @@ -{- quvi options for git-annex - - - - Copyright 2013 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -{-# LANGUAGE Rank2Types #-} - -module Annex.Quvi where - -import Annex.Common -import qualified Annex -import Utility.Quvi -import Utility.Url - -withQuviOptions :: forall a. Query a -> [QuviParams] -> URLString -> Annex a -withQuviOptions a ps url = do - v <- quviVersion - opts <- return [] - liftIO $ a v (concatMap (\mkp -> mkp v) ps ++ opts) url - -quviSupported :: URLString -> Annex Bool -quviSupported u = liftIO . flip supported u =<< quviVersion - -quviVersion :: Annex QuviVersion -quviVersion = go =<< Annex.getState Annex.quviversion - where - go (Just v) = return v - go Nothing = do - v <- liftIO probeVersion - Annex.changeState $ \s -> s { Annex.quviversion = Just v } - return v diff --git a/BuildInfo.hs b/BuildInfo.hs index 47dacc78bd..79253ee4f7 100644 --- a/BuildInfo.hs +++ b/BuildInfo.hs @@ -81,7 +81,6 @@ buildFlags = filter (not . null) -- Always enabled now, but users may be used to seeing these flags -- listed. , "Feeds" - , "Quvi" ] -- Not a complete list, let alone a listing transitive deps, but only diff --git a/CHANGELOG b/CHANGELOG index 521547f7a6..f7e396605a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,8 @@ git-annex (6.20171125) UNRELEASED; urgency=medium does not allow doing so without hitting the network, which would make this no faster than addurl --fast. Use addurl --fast instead if you want embedded media to be downloaded. + * importfeed now downloads things linked to by feeds, even when they are + not media files. -- Joey Hess Tue, 28 Nov 2017 13:48:44 -0400 diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 977bd8001a..06a960b0e4 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -165,7 +165,7 @@ downloadRemoteFile r relaxed uri file sz = checkCanAdd file $ do liftIO $ createDirectoryIfMissing True (parentDir file) ifM (Annex.getState Annex.fast <||> pure relaxed) ( do - cleanup (Remote.uuid r) loguri file urlkey Nothing + addWorkTree (Remote.uuid r) loguri file urlkey Nothing return (Just urlkey) , do -- Set temporary url for the urlkey @@ -214,46 +214,6 @@ performWeb o url file urlinfo = ifAnnexed file addurl geturl addurl = addUrlChecked (relaxedOption o) url webUUID $ \k -> return $ (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k) -performQuvi :: Bool -> URLString -> URLString -> FilePath -> CommandPerform -performQuvi relaxed pageurl videourl file = ifAnnexed file addurl geturl - where - quviurl = setDownloader pageurl QuviDownloader - addurl key = next $ do - cleanup webUUID quviurl file key Nothing - return True - geturl = next $ isJust <$> addUrlFileQuvi relaxed quviurl videourl file - -addUrlFileQuvi :: Bool -> URLString -> URLString -> FilePath -> Annex (Maybe Key) -addUrlFileQuvi relaxed quviurl videourl file = checkCanAdd file $ do - let key = Backend.URL.fromUrl quviurl Nothing - ifM (pure relaxed <||> Annex.getState Annex.fast) - ( do - cleanup webUUID quviurl file key Nothing - return (Just key) - , do - {- Get the size, and use that to check - - disk space. However, the size info is not - - retained, because the size of a video stream - - might change and we want to be able to download - - it later. -} - urlinfo <- Url.withUrlOptions (Url.getUrlInfo videourl) - let sizedkey = addSizeUrlKey urlinfo key - checkDiskSpaceToGet sizedkey Nothing $ do - tmp <- fromRepo $ gitAnnexTmpObjectLocation key - showOutput - ok <- Transfer.notifyTransfer Transfer.Download afile $ - Transfer.download webUUID key afile Transfer.forwardRetry $ \p -> do - liftIO $ createDirectoryIfMissing True (parentDir tmp) - downloadUrl key p [videourl] tmp - if ok - then do - cleanup webUUID quviurl file key (Just tmp) - return (Just key) - else return Nothing - ) - where - afile = AssociatedFile (Just file) - addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool)) -> Key -> CommandPerform addUrlChecked relaxed url u checkexistssize key | relaxed = do @@ -321,7 +281,7 @@ downloadWeb url urlinfo file = pruneTmpWorkDirBefore tmp (liftIO . nukeFile) let dest = takeFileName mediafile showDestinationFile dest - cleanup webUUID mediaurl dest mediakey (Just mediafile) + addWorkTree webUUID mediaurl dest mediakey (Just mediafile) return $ Right $ Just mediakey Right Nothing -> Right <$> normalfinish tmp Left msg -> return $ Left msg @@ -379,15 +339,16 @@ finishDownloadWith tmp u url file = do case k of Nothing -> return Nothing Just (key, _) -> do - cleanup u url file key (Just tmp) + addWorkTree u url file key (Just tmp) return (Just key) {- Adds the url size to the Key. -} addSizeUrlKey :: Url.UrlInfo -> Key -> Key addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo } -cleanup :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () -cleanup u url file key mtmp = case mtmp of +{- Adds worktree file to the repository. -} +addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () +addWorkTree u url file key mtmp = case mtmp of Nothing -> go Just tmp -> do -- Move to final location for large file check. @@ -418,7 +379,7 @@ nodownload :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) nodownload url urlinfo file | Url.urlExists urlinfo = do let key = Backend.URL.fromUrl url (Url.urlSize urlinfo) - cleanup webUUID url file key Nothing + addWorkTree webUUID url file key Nothing return (Just key) | otherwise = do warning $ "unable to access url: " ++ url diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 0a8122b250..57daa22f83 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -36,12 +36,12 @@ import Command.AddUrl (addUrlFile, downloadRemoteFile, parseRelaxedOption, parse import Annex.Perms import Annex.UUID import Backend.URL (fromUrl) -import Annex.Quvi -import qualified Utility.Quvi as Quvi -import Command.AddUrl (addUrlFileQuvi) +import Annex.Content +import Annex.YoutubeDl import Types.MetaData import Logs.MetaData import Annex.MetaData +import Command.AddUrl (addWorkTree) cmd :: Command cmd = notBareRepo $ @@ -101,7 +101,7 @@ data ToDownload = ToDownload , location :: DownloadLocation } -data DownloadLocation = Enclosure URLString | QuviLink URLString +data DownloadLocation = Enclosure URLString | MediaLink URLString type ItemId = String @@ -141,14 +141,10 @@ findDownloads u = go =<< downloadFeed u Just (enclosureurl, _, _) -> return $ Just $ ToDownload f u i $ Enclosure $ fromFeed enclosureurl - Nothing -> mkquvi f i - mkquvi f i = case getItemLink i of - Just link -> ifM (quviSupported $ fromFeed link) - ( return $ Just $ ToDownload f u i $ QuviLink $ - fromFeed link - , return Nothing - ) - Nothing -> return Nothing + Nothing -> case getItemLink i of + Just link -> return $ Just $ ToDownload f u i $ + MediaLink $ fromFeed link + Nothing -> return Nothing {- Feeds change, so a feed download cannot be resumed. -} downloadFeed :: URLString -> Annex (Maybe Feed) @@ -192,19 +188,18 @@ performDownload opts cache todownload = case location todownload of then catMaybes kl else [] - QuviLink pageurl -> do - let quviurl = setDownloader pageurl QuviDownloader - checkknown quviurl $ do - mp <- withQuviOptions Quvi.query [Quvi.quiet, Quvi.httponly] pageurl - case mp of - Nothing -> return False - Just page -> case headMaybe $ Quvi.pageLinks page of - Nothing -> return False - Just link -> do - let videourl = Quvi.linkUrl link - checkknown videourl $ - rundownload videourl ("." ++ fromMaybe "m" (Quvi.linkSuffix link)) $ \f -> - maybeToList <$> addUrlFileQuvi (relaxedOption opts) quviurl videourl f + MediaLink linkurl -> do + let mediaurl = setDownloader linkurl YoutubeDownloader + let mediakey = Backend.URL.fromUrl mediaurl Nothing + -- Old versions of git-annex that used quvi might have + -- used the quviurl for this, so check i/f it's known + -- to avoid adding it a second time. + let quviurl = setDownloader linkurl QuviDownloader + checkknown mediaurl $ checkknown quviurl $ + ifM (Annex.getState Annex.fast <||> pure (relaxedOption opts)) + ( addmediafast linkurl mediaurl mediakey + , downloadmedia linkurl mediaurl mediakey + ) where forced = Annex.getState Annex.force @@ -265,6 +260,44 @@ performDownload opts cache todownload = case location todownload of ( return Nothing , tryanother ) + + downloadmedia linkurl mediaurl mediakey = do + r <- withTmpWorkDir mediakey $ \workdir -> do + dl <- youtubeDl linkurl workdir + case dl of + Right (Just mediafile) -> do + let ext = case takeExtension mediafile of + [] -> ".m" + s -> s + ok <- rundownload linkurl ext $ \f -> do + addWorkTree webUUID mediaurl f mediakey (Just mediafile) + return [mediakey] + return (Right ok) + -- youtude-dl didn't support it, so + -- download it as if the link were + -- an enclosure. + Right Nothing -> Right <$> + performDownload opts cache todownload + { location = Enclosure linkurl } + Left msg -> return (Left msg) + case r of + Left msg -> do + warning msg + return False + Right b -> return b + + addmediafast linkurl mediaurl mediakey = + youtubeDlSupported linkurl >>= \case + Right True -> + rundownload linkurl ".m" $ \f -> do + addWorkTree webUUID mediaurl f mediakey Nothing + return [mediakey] + Right False -> + performDownload opts cache todownload + { location = Enclosure linkurl } + Left msg -> do + warning msg + return False defaultTemplate :: String defaultTemplate = "${feedtitle}/${itemtitle}${extension}" diff --git a/Utility/Quvi.hs b/Utility/Quvi.hs deleted file mode 100644 index ff1ad854c5..0000000000 --- a/Utility/Quvi.hs +++ /dev/null @@ -1,162 +0,0 @@ -{- querying quvi (import qualified) - - - - Copyright 2013 Joey Hess - - - - License: BSD-2-clause - -} - -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE FlexibleContexts #-} - -module Utility.Quvi where - -import Common -import Utility.Url - -import Data.Aeson -import qualified Data.ByteString as B -import qualified Data.ByteString.Lazy as BL -import qualified Data.Map as M -import Network.URI (uriAuthority, uriRegName) -import Data.Char - -data QuviVersion - = Quvi04 - | Quvi09 - | NoQuvi - deriving (Show) - -data Page = Page - { pageTitle :: String - , pageLinks :: [Link] - } deriving (Show) - -data Link = Link - { linkSuffix :: Maybe String - , linkUrl :: URLString - } deriving (Show) - -{- JSON instances for quvi 0.4. -} -instance FromJSON Page where - parseJSON (Object v) = Page - <$> v .: "page_title" - <*> v .: "link" - parseJSON _ = mzero - -instance FromJSON Link where - parseJSON (Object v) = Link - <$> v .:? "file_suffix" - <*> v .: "url" - parseJSON _ = mzero - -{- "enum" format used by quvi 0.9 -} -parseEnum :: String -> Maybe Page -parseEnum s = Page - <$> get "QUVI_MEDIA_PROPERTY_TITLE" - <*> ((:[]) <$> - ( Link - <$> Just <$> (get "QUVI_MEDIA_STREAM_PROPERTY_CONTAINER") - <*> get "QUVI_MEDIA_STREAM_PROPERTY_URL" - ) - ) - where - get = flip M.lookup m - m = M.fromList $ map (separate (== '=')) $ lines s - -probeVersion :: IO QuviVersion -probeVersion = catchDefaultIO NoQuvi $ - examine <$> processTranscript "quvi" ["--version"] Nothing - where - examine (s, True) - | "quvi v0.4" `isInfixOf` s = Quvi04 - | otherwise = Quvi09 - examine _ = NoQuvi - -type Query a = QuviVersion -> [CommandParam] -> URLString -> IO a - -{- Throws an error when quvi is not installed. -} -forceQuery :: Query (Maybe Page) -forceQuery v ps url = query' v ps url `catchNonAsync` onerr - where - onerr e = ifM (inPath "quvi") - ( giveup ("quvi failed: " ++ show e) - , giveup "quvi is not installed" - ) - -{- Returns Nothing if the page is not a video page, or quvi is not - - installed. -} -query :: Query (Maybe Page) -query v ps url = flip catchNonAsync (const $ return Nothing) (query' v ps url) - -query' :: Query (Maybe Page) -query' Quvi09 ps url = parseEnum - <$> readQuvi (toCommand $ [Param "dump", Param "-p", Param "enum"] ++ ps ++ [Param url]) -query' Quvi04 ps url = do - let p = proc "quvi" (toCommand $ ps ++ [Param url]) - decode . BL.fromStrict - <$> withHandle StdoutHandle createProcessSuccess p B.hGetContents -query' NoQuvi _ _ = return Nothing - -queryLinks :: Query [URLString] -queryLinks v ps url = maybe [] (map linkUrl . pageLinks) <$> query v ps url - -{- Checks if quvi can still find a download link for an url. - - If quvi is not installed, returns False. -} -check :: Query Bool -check v ps url = maybe False (not . null . pageLinks) <$> query v ps url - -{- Checks if an url is supported by quvi, as quickly as possible - - (without hitting it if possible), and without outputting - - anything. Also returns False if quvi is not installed. -} -supported :: QuviVersion -> URLString -> IO Bool -supported NoQuvi _ = return False -supported Quvi04 url = boolSystem "quvi" - [ Param "--verbosity", Param "mute" - , Param "--support" - , Param url - ] -{- Use quvi-info to see if the url's domain is supported. - - If so, have to do a online verification of the url. -} -supported Quvi09 url = (firstlevel <&&> secondlevel) - `catchNonAsync` (\_ -> return False) - where - firstlevel = case uriAuthority =<< parseURIRelaxed url of - Nothing -> return False - Just auth -> do - let domain = map toLower $ uriRegName auth - let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ splitc '.' domain - any (\h -> domain `isSuffixOf` h || basedomain `isSuffixOf` h) - . map (map toLower) <$> listdomains Quvi09 - secondlevel = snd <$> processTranscript "quvi" - (toCommand [Param "dump", Param "-o", Param url]) Nothing - -listdomains :: QuviVersion -> IO [String] -listdomains Quvi09 = concatMap (splitc ',') - . concatMap (drop 1 . words) - . filter ("domains: " `isPrefixOf`) . lines - <$> readQuvi (toCommand [Param "info", Param "-p", Param "domains"]) -listdomains _ = return [] - -type QuviParams = QuviVersion -> [CommandParam] - -{- Disables progress, but not information output. -} -quiet :: QuviParams --- Cannot use quiet as it now disables informational output. --- No way to disable progress. -quiet Quvi09 = [Param "--verbosity", Param "verbose"] -quiet Quvi04 = [Param "--verbosity", Param "quiet"] -quiet NoQuvi = [] - -{- Only return http results, not streaming protocols. -} -httponly :: QuviParams --- No way to do it with 0.9? -httponly Quvi04 = [Param "-c", Param "http"] -httponly _ = [] -- No way to do it with 0.9? - -readQuvi :: [String] -> IO String -readQuvi ps = withHandle StdoutHandle createProcessSuccess p $ \h -> do - r <- hGetContentsStrict h - hClose h - return r - where - p = proc "quvi" ps diff --git a/doc/git-annex-importfeed.mdwn b/doc/git-annex-importfeed.mdwn index 99eec85a4f..2f146fbfe0 100644 --- a/doc/git-annex-importfeed.mdwn +++ b/doc/git-annex-importfeed.mdwn @@ -8,7 +8,7 @@ git annex importfeed `[url ...]` # DESCRIPTION -Imports the contents of podcast feeds. Only downloads files whose +Imports the contents of podcasts and other feeds. Only downloads files whose content has not already been added to the repository before, so you can delete, rename, etc the resulting files and repeated runs won't duplicate them. @@ -37,6 +37,23 @@ To make the import process add metadata to the imported files from the feed, These options behave the same as when using [[git-annex-addurl]](1). +* `--fast` + + Avoid immediately downloading urls. The url is still checked + (via HEAD) to verify that it exists, and to get its size if possible. + +* `--relaxed` + + Don't immediately download urls, and avoid storing the size of the + url's content. This makes git-annex accept whatever content is there + at a future point. + +* `--raw` + + Prevent special handling of urls by youtube-dl, bittorrent, and other + special remotes. This will for example, make importfeed + download a .torrent file and not the contents it points to. + # SEE ALSO [[git-annex]](1) diff --git a/git-annex.cabal b/git-annex.cabal index cccedaee95..4e2269a51d 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -531,7 +531,6 @@ Executable git-annex Annex.Path Annex.Perms Annex.Queue - Annex.Quvi Annex.ReplaceFile Annex.SpecialRemote Annex.Ssh @@ -1035,7 +1034,6 @@ Executable git-annex Utility.Process Utility.Process.Shim Utility.QuickCheck - Utility.Quvi Utility.Rsync Utility.SRV Utility.SafeCommand