rethought --relaxed change

Better to make it not be surprising and slow, than surprising and fast.
--raw can be used when it needs to be really fast.

Implemented adding a youtube-dl supported url to an existing file.

This commit was sponsored by andrea rota.
This commit is contained in:
Joey Hess 2017-11-30 13:45:43 -04:00
parent 8a0038ec23
commit 2528e3ddb0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 75 additions and 84 deletions

View file

@ -1017,10 +1017,10 @@ pruneTmpWorkDirBefore f action = do
{- Runs an action, passing it a temporary work directory where
- it can write files while receiving the content of a key.
-
- On exception, or when the action returns a Left value,
- On exception, or when the action returns Nothing,
- the temporary work directory is left, so resumes can use it.
-}
withTmpWorkDir :: Key -> (FilePath -> Annex (Either a b)) -> Annex (Either a b)
withTmpWorkDir :: Key -> (FilePath -> Annex (Maybe a)) -> Annex (Maybe a)
withTmpWorkDir key action = do
-- Create the object file if it does not exist. This way,
-- staleKeysPrune only has to look for object files, and can
@ -1034,8 +1034,8 @@ withTmpWorkDir key action = do
setAnnexDirPerm tmpdir
res <- action tmpdir
case res of
Right _ -> liftIO $ removeDirectoryRecursive tmpdir
Left _ -> noop
Just _ -> liftIO $ removeDirectoryRecursive tmpdir
Nothing -> noop
return res
{- Finds items in the first, smaller list, that are not

View file

@ -71,14 +71,12 @@ youtubeDlTo key url dest = do
case dl of
Right (Just mediafile) -> do
liftIO $ renameFile mediafile dest
return (Right True)
Right Nothing -> return (Right False)
Left msg -> return (Left msg)
case res of
Left msg -> do
warning msg
return False
Right r -> return r
return (Just True)
Right Nothing -> return (Just False)
Left msg -> do
warning msg
return Nothing
return (fromMaybe False res)
youtubeDlSupported :: URLString -> Annex Bool
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url

View file

@ -3,10 +3,10 @@ git-annex (6.20171125) UNRELEASED; urgency=medium
* Use youtube-dl rather than quvi to download media from web pages,
since quvi is not being actively developed and youtube-dl supports
many more sites.
* addurl --relaxed no longer checks for embedded media, since youtube-dl
does not allow doing so without hitting the network, which would make
this no faster than addurl --fast. Use addurl --fast instead if you
want embedded media to be downloaded.
* addurl --relaxed got slower, since youtube-dl has to hit the network
to check for embedded media. If you relied on --relaxed not hitting the
network for speed reasons, using --relaxed --raw will get the old level
of speed, but can't be used for urls with embedded videos.
* importfeed now downloads things linked to by feeds, even when they are
not media files.

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
- Copyright 2011-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -155,8 +155,8 @@ performRemote r relaxed uri file sz = ifAnnexed file adduri geturi
loguri = setDownloader uri OtherDownloader
adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize
checkexistssize key = return $ case sz of
Nothing -> (True, True)
Just n -> (True, n == fromMaybe n (keySize key))
Nothing -> (True, True, uri)
Just n -> (True, n == fromMaybe n (keySize key), uri)
geturi = next $ isJust <$> downloadRemoteFile r relaxed uri file sz
downloadRemoteFile :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key)
@ -210,22 +210,23 @@ performWeb :: AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPe
performWeb o url file urlinfo = ifAnnexed file addurl geturl
where
geturl = next $ isJust <$> addUrlFile (Just o) (relaxedOption o) url urlinfo file
-- TODO youtube-dl
addurl = addUrlChecked (relaxedOption o) url webUUID $ \k -> return $
(Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k)
addurl = addUrlChecked (relaxedOption o) url webUUID $ \k ->
ifM (youtubeDlSupported url)
( return (True, True, setDownloader url YoutubeDownloader)
, return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url)
)
addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool)) -> Key -> CommandPerform
addUrlChecked relaxed url u checkexistssize key
| relaxed = do
setUrlPresent u key url
next $ return True
| otherwise = ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key))
{- Check that the url exists, and has the same size as the key,
- and add it as an url to the key. -}
addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool, URLString)) -> Key -> CommandPerform
addUrlChecked relaxed url u checkexistssize key =
ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key))
( next $ return True -- nothing to do
, do
(exists, samesize) <- checkexistssize key
if exists && samesize
(exists, samesize, url') <- checkexistssize key
if exists && (samesize || relaxed)
then do
setUrlPresent u key url
setUrlPresent u key url'
next $ return True
else do
warning $ "while adding a new url to an already annexed file, " ++ if exists
@ -234,20 +235,16 @@ addUrlChecked relaxed url u checkexistssize key
stop
)
{- Downloads an url and adds it to the repository, normally at the specified
- FilePath. But, if youtube-dl supports the url, it will be written to a
{- Downloads an url (except in fast or relaxed mode) and adds it to the
- repository, normally at the specified FilePath.
- But, if youtube-dl supports the url, it will be written to a
- different file, based on the title of the media. Unless the user
- specified fileOption, which then forces using the FilePath.
-}
addUrlFile :: Maybe AddUrlOptions -> Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
addUrlFile mo relaxed url urlinfo file
| relaxed = checkCanAdd file $ do
liftIO $ createDirectoryIfMissing True (parentDir file)
nodownload url urlinfo file
| otherwise = ifM (Annex.getState Annex.fast)
( checkCanAdd file $ do
liftIO $ createDirectoryIfMissing True (parentDir file)
nodownload url urlinfo file
addUrlFile mo relaxed url urlinfo file =
ifM (Annex.getState Annex.fast <||> pure relaxed)
( nodownloadWeb url urlinfo file
, downloadWeb mo url urlinfo file
)
@ -266,33 +263,31 @@ downloadWeb mo url urlinfo file =
( tryyoutubedl tmp
, normalfinish tmp
)
normalfinish tmp = do
normalfinish tmp = checkCanAdd file $ do
showDestinationFile file
liftIO $ createDirectoryIfMissing True (parentDir file)
finishDownloadWith tmp webUUID url file
tryyoutubedl tmp = do
let mediaurl = setDownloader url YoutubeDownloader
let mediakey = Backend.URL.fromUrl mediaurl Nothing
res <- withTmpWorkDir mediakey $ \workdir ->
Transfer.notifyTransfer Transfer.Download url $
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do
dl <- youtubeDl url workdir
case dl of
Right (Just mediafile) -> do
pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
let dest = if isJust (fileOption <$> mo)
then file
else takeFileName mediafile
tryyoutubedl tmp = withTmpWorkDir mediakey $ \workdir ->
Transfer.notifyTransfer Transfer.Download url $
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do
dl <- youtubeDl url workdir
case dl of
Right (Just mediafile) -> do
pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
let dest = if isJust (fileOption <$> mo)
then file
else takeFileName mediafile
checkCanAdd dest $ do
showDestinationFile dest
addWorkTree webUUID mediaurl dest mediakey (Just mediafile)
return $ Right $ Just mediakey
Right Nothing -> Right <$> normalfinish tmp
Left msg -> return $ Left msg
case res of
Left msg -> do
warning msg
return Nothing
Right r -> return r
return $ Just mediakey
Right Nothing -> normalfinish tmp
Left msg -> do
warning msg
return Nothing
where
mediaurl = setDownloader url YoutubeDownloader
mediakey = Backend.URL.fromUrl mediaurl Nothing
showDestinationFile :: FilePath -> Annex ()
showDestinationFile file = do
@ -378,9 +373,10 @@ addWorkTree u url file key mtmp = case mtmp of
)
-- TODO youtube-dl
nodownload :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
nodownload url urlinfo file
| Url.urlExists urlinfo = do
nodownloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
nodownloadWeb url urlinfo file
| Url.urlExists urlinfo = checkCanAdd file $ do
liftIO $ createDirectoryIfMissing True (parentDir file)
let key = Backend.URL.fromUrl url (Url.urlSize urlinfo)
addWorkTree webUUID url file key Nothing
return (Just key)

View file

@ -272,19 +272,17 @@ performDownload opts cache todownload = case location todownload of
ok <- rundownload linkurl ext $ \f -> do
addWorkTree webUUID mediaurl f mediakey (Just mediafile)
return [mediakey]
return (Right ok)
return (Just ok)
-- youtude-dl didn't support it, so
-- download it as if the link were
-- an enclosure.
Right Nothing -> Right <$>
Right Nothing -> Just <$>
performDownload opts cache todownload
{ location = Enclosure linkurl }
Left msg -> return (Left msg)
case r of
Left msg -> do
warning msg
return False
Right b -> return b
Left msg -> do
warning msg
return Nothing
return (fromMaybe False r)
addmediafast linkurl mediaurl mediakey = ifM (youtubeDlSupported linkurl)
( rundownload linkurl ".m" $ \f -> do

View file

@ -87,7 +87,7 @@ instance Observable (Either e Bool) where
observeBool (Right b) = b
observeFailure = Right False
instance Observable (Either e (Maybe a)) where
observeBool (Right (Just _)) = True
observeBool _ = False
observeFailure = Right Nothing
instance Observable (Maybe a) where
observeBool (Just _) = True
observeBool Nothing = False
observeFailure = Nothing

View file

@ -32,12 +32,11 @@ be used to get better filenames.
Don't immediately download the url, and avoid storing the size of the
url's content. This makes git-annex accept whatever content is there
at a future point. This is also the fastest option, since it does not
hit the network at all.
at a future point.
Note that this does *not* check for embedded videos using `youtube-dl`,
although it used to in previous versions of git-annex.
Use --fast instead if you want to do that.
This is the fastest option, but it still has to access the network
to check if the url contains embedded media. When adding large numbers
of urls, using `--relaxed --raw` is much faster.
* `--raw`