From 2528e3ddb048c4850bb541d75cc7125cae121d2e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 30 Nov 2017 13:45:43 -0400 Subject: [PATCH] rethought --relaxed change Better to make it not be surprising and slow, than surprising and fast. --raw can be used when it needs to be really fast. Implemented adding a youtube-dl supported url to an existing file. This commit was sponsored by andrea rota. --- Annex/Content.hs | 8 ++-- Annex/YoutubeDl.hs | 14 +++--- CHANGELOG | 8 ++-- Command/AddUrl.hs | 96 +++++++++++++++++++-------------------- Command/ImportFeed.hs | 14 +++--- Types/Transfer.hs | 8 ++-- doc/git-annex-addurl.mdwn | 11 ++--- 7 files changed, 75 insertions(+), 84 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 0899a12ab7..4db5fda383 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -1017,10 +1017,10 @@ pruneTmpWorkDirBefore f action = do {- Runs an action, passing it a temporary work directory where - it can write files while receiving the content of a key. - - - On exception, or when the action returns a Left value, + - On exception, or when the action returns Nothing, - the temporary work directory is left, so resumes can use it. -} -withTmpWorkDir :: Key -> (FilePath -> Annex (Either a b)) -> Annex (Either a b) +withTmpWorkDir :: Key -> (FilePath -> Annex (Maybe a)) -> Annex (Maybe a) withTmpWorkDir key action = do -- Create the object file if it does not exist. This way, -- staleKeysPrune only has to look for object files, and can @@ -1034,8 +1034,8 @@ withTmpWorkDir key action = do setAnnexDirPerm tmpdir res <- action tmpdir case res of - Right _ -> liftIO $ removeDirectoryRecursive tmpdir - Left _ -> noop + Just _ -> liftIO $ removeDirectoryRecursive tmpdir + Nothing -> noop return res {- Finds items in the first, smaller list, that are not diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs index 1eafa41737..4e33c2ff36 100644 --- a/Annex/YoutubeDl.hs +++ b/Annex/YoutubeDl.hs @@ -71,14 +71,12 @@ youtubeDlTo key url dest = do case dl of Right (Just mediafile) -> do liftIO $ renameFile mediafile dest - return (Right True) - Right Nothing -> return (Right False) - Left msg -> return (Left msg) - case res of - Left msg -> do - warning msg - return False - Right r -> return r + return (Just True) + Right Nothing -> return (Just False) + Left msg -> do + warning msg + return Nothing + return (fromMaybe False res) youtubeDlSupported :: URLString -> Annex Bool youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url diff --git a/CHANGELOG b/CHANGELOG index f7e396605a..090a41797f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,10 +3,10 @@ git-annex (6.20171125) UNRELEASED; urgency=medium * Use youtube-dl rather than quvi to download media from web pages, since quvi is not being actively developed and youtube-dl supports many more sites. - * addurl --relaxed no longer checks for embedded media, since youtube-dl - does not allow doing so without hitting the network, which would make - this no faster than addurl --fast. Use addurl --fast instead if you - want embedded media to be downloaded. + * addurl --relaxed got slower, since youtube-dl has to hit the network + to check for embedded media. If you relied on --relaxed not hitting the + network for speed reasons, using --relaxed --raw will get the old level + of speed, but can't be used for urls with embedded videos. * importfeed now downloads things linked to by feeds, even when they are not media files. diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 6db619e70a..12a6a4ff8c 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2011-2014 Joey Hess + - Copyright 2011-2017 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -155,8 +155,8 @@ performRemote r relaxed uri file sz = ifAnnexed file adduri geturi loguri = setDownloader uri OtherDownloader adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize checkexistssize key = return $ case sz of - Nothing -> (True, True) - Just n -> (True, n == fromMaybe n (keySize key)) + Nothing -> (True, True, uri) + Just n -> (True, n == fromMaybe n (keySize key), uri) geturi = next $ isJust <$> downloadRemoteFile r relaxed uri file sz downloadRemoteFile :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key) @@ -210,22 +210,23 @@ performWeb :: AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPe performWeb o url file urlinfo = ifAnnexed file addurl geturl where geturl = next $ isJust <$> addUrlFile (Just o) (relaxedOption o) url urlinfo file - -- TODO youtube-dl - addurl = addUrlChecked (relaxedOption o) url webUUID $ \k -> return $ - (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k) + addurl = addUrlChecked (relaxedOption o) url webUUID $ \k -> + ifM (youtubeDlSupported url) + ( return (True, True, setDownloader url YoutubeDownloader) + , return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url) + ) -addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool)) -> Key -> CommandPerform -addUrlChecked relaxed url u checkexistssize key - | relaxed = do - setUrlPresent u key url - next $ return True - | otherwise = ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key)) +{- Check that the url exists, and has the same size as the key, + - and add it as an url to the key. -} +addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool, URLString)) -> Key -> CommandPerform +addUrlChecked relaxed url u checkexistssize key = + ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key)) ( next $ return True -- nothing to do , do - (exists, samesize) <- checkexistssize key - if exists && samesize + (exists, samesize, url') <- checkexistssize key + if exists && (samesize || relaxed) then do - setUrlPresent u key url + setUrlPresent u key url' next $ return True else do warning $ "while adding a new url to an already annexed file, " ++ if exists @@ -234,20 +235,16 @@ addUrlChecked relaxed url u checkexistssize key stop ) -{- Downloads an url and adds it to the repository, normally at the specified - - FilePath. But, if youtube-dl supports the url, it will be written to a +{- Downloads an url (except in fast or relaxed mode) and adds it to the + - repository, normally at the specified FilePath. + - But, if youtube-dl supports the url, it will be written to a - different file, based on the title of the media. Unless the user - specified fileOption, which then forces using the FilePath. -} addUrlFile :: Maybe AddUrlOptions -> Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) -addUrlFile mo relaxed url urlinfo file - | relaxed = checkCanAdd file $ do - liftIO $ createDirectoryIfMissing True (parentDir file) - nodownload url urlinfo file - | otherwise = ifM (Annex.getState Annex.fast) - ( checkCanAdd file $ do - liftIO $ createDirectoryIfMissing True (parentDir file) - nodownload url urlinfo file +addUrlFile mo relaxed url urlinfo file = + ifM (Annex.getState Annex.fast <||> pure relaxed) + ( nodownloadWeb url urlinfo file , downloadWeb mo url urlinfo file ) @@ -266,33 +263,31 @@ downloadWeb mo url urlinfo file = ( tryyoutubedl tmp , normalfinish tmp ) - normalfinish tmp = do + normalfinish tmp = checkCanAdd file $ do showDestinationFile file liftIO $ createDirectoryIfMissing True (parentDir file) finishDownloadWith tmp webUUID url file - tryyoutubedl tmp = do - let mediaurl = setDownloader url YoutubeDownloader - let mediakey = Backend.URL.fromUrl mediaurl Nothing - res <- withTmpWorkDir mediakey $ \workdir -> - Transfer.notifyTransfer Transfer.Download url $ - Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do - dl <- youtubeDl url workdir - case dl of - Right (Just mediafile) -> do - pruneTmpWorkDirBefore tmp (liftIO . nukeFile) - let dest = if isJust (fileOption <$> mo) - then file - else takeFileName mediafile + tryyoutubedl tmp = withTmpWorkDir mediakey $ \workdir -> + Transfer.notifyTransfer Transfer.Download url $ + Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do + dl <- youtubeDl url workdir + case dl of + Right (Just mediafile) -> do + pruneTmpWorkDirBefore tmp (liftIO . nukeFile) + let dest = if isJust (fileOption <$> mo) + then file + else takeFileName mediafile + checkCanAdd dest $ do showDestinationFile dest addWorkTree webUUID mediaurl dest mediakey (Just mediafile) - return $ Right $ Just mediakey - Right Nothing -> Right <$> normalfinish tmp - Left msg -> return $ Left msg - case res of - Left msg -> do - warning msg - return Nothing - Right r -> return r + return $ Just mediakey + Right Nothing -> normalfinish tmp + Left msg -> do + warning msg + return Nothing + where + mediaurl = setDownloader url YoutubeDownloader + mediakey = Backend.URL.fromUrl mediaurl Nothing showDestinationFile :: FilePath -> Annex () showDestinationFile file = do @@ -378,9 +373,10 @@ addWorkTree u url file key mtmp = case mtmp of ) -- TODO youtube-dl -nodownload :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) -nodownload url urlinfo file - | Url.urlExists urlinfo = do +nodownloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) +nodownloadWeb url urlinfo file + | Url.urlExists urlinfo = checkCanAdd file $ do + liftIO $ createDirectoryIfMissing True (parentDir file) let key = Backend.URL.fromUrl url (Url.urlSize urlinfo) addWorkTree webUUID url file key Nothing return (Just key) diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 1720060bde..c003302b6d 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -272,19 +272,17 @@ performDownload opts cache todownload = case location todownload of ok <- rundownload linkurl ext $ \f -> do addWorkTree webUUID mediaurl f mediakey (Just mediafile) return [mediakey] - return (Right ok) + return (Just ok) -- youtude-dl didn't support it, so -- download it as if the link were -- an enclosure. - Right Nothing -> Right <$> + Right Nothing -> Just <$> performDownload opts cache todownload { location = Enclosure linkurl } - Left msg -> return (Left msg) - case r of - Left msg -> do - warning msg - return False - Right b -> return b + Left msg -> do + warning msg + return Nothing + return (fromMaybe False r) addmediafast linkurl mediaurl mediakey = ifM (youtubeDlSupported linkurl) ( rundownload linkurl ".m" $ \f -> do diff --git a/Types/Transfer.hs b/Types/Transfer.hs index 73952c56ec..1b54f85110 100644 --- a/Types/Transfer.hs +++ b/Types/Transfer.hs @@ -87,7 +87,7 @@ instance Observable (Either e Bool) where observeBool (Right b) = b observeFailure = Right False -instance Observable (Either e (Maybe a)) where - observeBool (Right (Just _)) = True - observeBool _ = False - observeFailure = Right Nothing +instance Observable (Maybe a) where + observeBool (Just _) = True + observeBool Nothing = False + observeFailure = Nothing diff --git a/doc/git-annex-addurl.mdwn b/doc/git-annex-addurl.mdwn index 5d68f40123..a43976b569 100644 --- a/doc/git-annex-addurl.mdwn +++ b/doc/git-annex-addurl.mdwn @@ -32,13 +32,12 @@ be used to get better filenames. Don't immediately download the url, and avoid storing the size of the url's content. This makes git-annex accept whatever content is there - at a future point. This is also the fastest option, since it does not - hit the network at all. - - Note that this does *not* check for embedded videos using `youtube-dl`, - although it used to in previous versions of git-annex. - Use --fast instead if you want to do that. + at a future point. + This is the fastest option, but it still has to access the network + to check if the url contains embedded media. When adding large numbers + of urls, using `--relaxed --raw` is much faster. + * `--raw` Prevent special handling of urls by youtube-dl, bittorrent, and other