rethought --relaxed change

Better to make it not be surprising and slow, than surprising and fast.
--raw can be used when it needs to be really fast.

Implemented adding a youtube-dl supported url to an existing file.

This commit was sponsored by andrea rota.
This commit is contained in:
Joey Hess 2017-11-30 13:45:43 -04:00
parent 8a0038ec23
commit 2528e3ddb0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 75 additions and 84 deletions

View file

@ -1017,10 +1017,10 @@ pruneTmpWorkDirBefore f action = do
{- Runs an action, passing it a temporary work directory where {- Runs an action, passing it a temporary work directory where
- it can write files while receiving the content of a key. - it can write files while receiving the content of a key.
- -
- On exception, or when the action returns a Left value, - On exception, or when the action returns Nothing,
- the temporary work directory is left, so resumes can use it. - the temporary work directory is left, so resumes can use it.
-} -}
withTmpWorkDir :: Key -> (FilePath -> Annex (Either a b)) -> Annex (Either a b) withTmpWorkDir :: Key -> (FilePath -> Annex (Maybe a)) -> Annex (Maybe a)
withTmpWorkDir key action = do withTmpWorkDir key action = do
-- Create the object file if it does not exist. This way, -- Create the object file if it does not exist. This way,
-- staleKeysPrune only has to look for object files, and can -- staleKeysPrune only has to look for object files, and can
@ -1034,8 +1034,8 @@ withTmpWorkDir key action = do
setAnnexDirPerm tmpdir setAnnexDirPerm tmpdir
res <- action tmpdir res <- action tmpdir
case res of case res of
Right _ -> liftIO $ removeDirectoryRecursive tmpdir Just _ -> liftIO $ removeDirectoryRecursive tmpdir
Left _ -> noop Nothing -> noop
return res return res
{- Finds items in the first, smaller list, that are not {- Finds items in the first, smaller list, that are not

View file

@ -71,14 +71,12 @@ youtubeDlTo key url dest = do
case dl of case dl of
Right (Just mediafile) -> do Right (Just mediafile) -> do
liftIO $ renameFile mediafile dest liftIO $ renameFile mediafile dest
return (Right True) return (Just True)
Right Nothing -> return (Right False) Right Nothing -> return (Just False)
Left msg -> return (Left msg) Left msg -> do
case res of warning msg
Left msg -> do return Nothing
warning msg return (fromMaybe False res)
return False
Right r -> return r
youtubeDlSupported :: URLString -> Annex Bool youtubeDlSupported :: URLString -> Annex Bool
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url

View file

@ -3,10 +3,10 @@ git-annex (6.20171125) UNRELEASED; urgency=medium
* Use youtube-dl rather than quvi to download media from web pages, * Use youtube-dl rather than quvi to download media from web pages,
since quvi is not being actively developed and youtube-dl supports since quvi is not being actively developed and youtube-dl supports
many more sites. many more sites.
* addurl --relaxed no longer checks for embedded media, since youtube-dl * addurl --relaxed got slower, since youtube-dl has to hit the network
does not allow doing so without hitting the network, which would make to check for embedded media. If you relied on --relaxed not hitting the
this no faster than addurl --fast. Use addurl --fast instead if you network for speed reasons, using --relaxed --raw will get the old level
want embedded media to be downloaded. of speed, but can't be used for urls with embedded videos.
* importfeed now downloads things linked to by feeds, even when they are * importfeed now downloads things linked to by feeds, even when they are
not media files. not media files.

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2011-2014 Joey Hess <id@joeyh.name> - Copyright 2011-2017 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -155,8 +155,8 @@ performRemote r relaxed uri file sz = ifAnnexed file adduri geturi
loguri = setDownloader uri OtherDownloader loguri = setDownloader uri OtherDownloader
adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize
checkexistssize key = return $ case sz of checkexistssize key = return $ case sz of
Nothing -> (True, True) Nothing -> (True, True, uri)
Just n -> (True, n == fromMaybe n (keySize key)) Just n -> (True, n == fromMaybe n (keySize key), uri)
geturi = next $ isJust <$> downloadRemoteFile r relaxed uri file sz geturi = next $ isJust <$> downloadRemoteFile r relaxed uri file sz
downloadRemoteFile :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key) downloadRemoteFile :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key)
@ -210,22 +210,23 @@ performWeb :: AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPe
performWeb o url file urlinfo = ifAnnexed file addurl geturl performWeb o url file urlinfo = ifAnnexed file addurl geturl
where where
geturl = next $ isJust <$> addUrlFile (Just o) (relaxedOption o) url urlinfo file geturl = next $ isJust <$> addUrlFile (Just o) (relaxedOption o) url urlinfo file
-- TODO youtube-dl addurl = addUrlChecked (relaxedOption o) url webUUID $ \k ->
addurl = addUrlChecked (relaxedOption o) url webUUID $ \k -> return $ ifM (youtubeDlSupported url)
(Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k) ( return (True, True, setDownloader url YoutubeDownloader)
, return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url)
)
addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool)) -> Key -> CommandPerform {- Check that the url exists, and has the same size as the key,
addUrlChecked relaxed url u checkexistssize key - and add it as an url to the key. -}
| relaxed = do addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool, URLString)) -> Key -> CommandPerform
setUrlPresent u key url addUrlChecked relaxed url u checkexistssize key =
next $ return True ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key))
| otherwise = ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key))
( next $ return True -- nothing to do ( next $ return True -- nothing to do
, do , do
(exists, samesize) <- checkexistssize key (exists, samesize, url') <- checkexistssize key
if exists && samesize if exists && (samesize || relaxed)
then do then do
setUrlPresent u key url setUrlPresent u key url'
next $ return True next $ return True
else do else do
warning $ "while adding a new url to an already annexed file, " ++ if exists warning $ "while adding a new url to an already annexed file, " ++ if exists
@ -234,20 +235,16 @@ addUrlChecked relaxed url u checkexistssize key
stop stop
) )
{- Downloads an url and adds it to the repository, normally at the specified {- Downloads an url (except in fast or relaxed mode) and adds it to the
- FilePath. But, if youtube-dl supports the url, it will be written to a - repository, normally at the specified FilePath.
- But, if youtube-dl supports the url, it will be written to a
- different file, based on the title of the media. Unless the user - different file, based on the title of the media. Unless the user
- specified fileOption, which then forces using the FilePath. - specified fileOption, which then forces using the FilePath.
-} -}
addUrlFile :: Maybe AddUrlOptions -> Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) addUrlFile :: Maybe AddUrlOptions -> Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
addUrlFile mo relaxed url urlinfo file addUrlFile mo relaxed url urlinfo file =
| relaxed = checkCanAdd file $ do ifM (Annex.getState Annex.fast <||> pure relaxed)
liftIO $ createDirectoryIfMissing True (parentDir file) ( nodownloadWeb url urlinfo file
nodownload url urlinfo file
| otherwise = ifM (Annex.getState Annex.fast)
( checkCanAdd file $ do
liftIO $ createDirectoryIfMissing True (parentDir file)
nodownload url urlinfo file
, downloadWeb mo url urlinfo file , downloadWeb mo url urlinfo file
) )
@ -266,33 +263,31 @@ downloadWeb mo url urlinfo file =
( tryyoutubedl tmp ( tryyoutubedl tmp
, normalfinish tmp , normalfinish tmp
) )
normalfinish tmp = do normalfinish tmp = checkCanAdd file $ do
showDestinationFile file showDestinationFile file
liftIO $ createDirectoryIfMissing True (parentDir file) liftIO $ createDirectoryIfMissing True (parentDir file)
finishDownloadWith tmp webUUID url file finishDownloadWith tmp webUUID url file
tryyoutubedl tmp = do tryyoutubedl tmp = withTmpWorkDir mediakey $ \workdir ->
let mediaurl = setDownloader url YoutubeDownloader Transfer.notifyTransfer Transfer.Download url $
let mediakey = Backend.URL.fromUrl mediaurl Nothing Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do
res <- withTmpWorkDir mediakey $ \workdir -> dl <- youtubeDl url workdir
Transfer.notifyTransfer Transfer.Download url $ case dl of
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do Right (Just mediafile) -> do
dl <- youtubeDl url workdir pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
case dl of let dest = if isJust (fileOption <$> mo)
Right (Just mediafile) -> do then file
pruneTmpWorkDirBefore tmp (liftIO . nukeFile) else takeFileName mediafile
let dest = if isJust (fileOption <$> mo) checkCanAdd dest $ do
then file
else takeFileName mediafile
showDestinationFile dest showDestinationFile dest
addWorkTree webUUID mediaurl dest mediakey (Just mediafile) addWorkTree webUUID mediaurl dest mediakey (Just mediafile)
return $ Right $ Just mediakey return $ Just mediakey
Right Nothing -> Right <$> normalfinish tmp Right Nothing -> normalfinish tmp
Left msg -> return $ Left msg Left msg -> do
case res of warning msg
Left msg -> do return Nothing
warning msg where
return Nothing mediaurl = setDownloader url YoutubeDownloader
Right r -> return r mediakey = Backend.URL.fromUrl mediaurl Nothing
showDestinationFile :: FilePath -> Annex () showDestinationFile :: FilePath -> Annex ()
showDestinationFile file = do showDestinationFile file = do
@ -378,9 +373,10 @@ addWorkTree u url file key mtmp = case mtmp of
) )
-- TODO youtube-dl -- TODO youtube-dl
nodownload :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key) nodownloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
nodownload url urlinfo file nodownloadWeb url urlinfo file
| Url.urlExists urlinfo = do | Url.urlExists urlinfo = checkCanAdd file $ do
liftIO $ createDirectoryIfMissing True (parentDir file)
let key = Backend.URL.fromUrl url (Url.urlSize urlinfo) let key = Backend.URL.fromUrl url (Url.urlSize urlinfo)
addWorkTree webUUID url file key Nothing addWorkTree webUUID url file key Nothing
return (Just key) return (Just key)

View file

@ -272,19 +272,17 @@ performDownload opts cache todownload = case location todownload of
ok <- rundownload linkurl ext $ \f -> do ok <- rundownload linkurl ext $ \f -> do
addWorkTree webUUID mediaurl f mediakey (Just mediafile) addWorkTree webUUID mediaurl f mediakey (Just mediafile)
return [mediakey] return [mediakey]
return (Right ok) return (Just ok)
-- youtude-dl didn't support it, so -- youtude-dl didn't support it, so
-- download it as if the link were -- download it as if the link were
-- an enclosure. -- an enclosure.
Right Nothing -> Right <$> Right Nothing -> Just <$>
performDownload opts cache todownload performDownload opts cache todownload
{ location = Enclosure linkurl } { location = Enclosure linkurl }
Left msg -> return (Left msg) Left msg -> do
case r of warning msg
Left msg -> do return Nothing
warning msg return (fromMaybe False r)
return False
Right b -> return b
addmediafast linkurl mediaurl mediakey = ifM (youtubeDlSupported linkurl) addmediafast linkurl mediaurl mediakey = ifM (youtubeDlSupported linkurl)
( rundownload linkurl ".m" $ \f -> do ( rundownload linkurl ".m" $ \f -> do

View file

@ -87,7 +87,7 @@ instance Observable (Either e Bool) where
observeBool (Right b) = b observeBool (Right b) = b
observeFailure = Right False observeFailure = Right False
instance Observable (Either e (Maybe a)) where instance Observable (Maybe a) where
observeBool (Right (Just _)) = True observeBool (Just _) = True
observeBool _ = False observeBool Nothing = False
observeFailure = Right Nothing observeFailure = Nothing

View file

@ -32,13 +32,12 @@ be used to get better filenames.
Don't immediately download the url, and avoid storing the size of the Don't immediately download the url, and avoid storing the size of the
url's content. This makes git-annex accept whatever content is there url's content. This makes git-annex accept whatever content is there
at a future point. This is also the fastest option, since it does not at a future point.
hit the network at all.
Note that this does *not* check for embedded videos using `youtube-dl`,
although it used to in previous versions of git-annex.
Use --fast instead if you want to do that.
This is the fastest option, but it still has to access the network
to check if the url contains embedded media. When adding large numbers
of urls, using `--relaxed --raw` is much faster.
* `--raw` * `--raw`
Prevent special handling of urls by youtube-dl, bittorrent, and other Prevent special handling of urls by youtube-dl, bittorrent, and other