rethought --relaxed change

Better to make it not be surprising and slow, than surprising and fast.
--raw can be used when it needs to be really fast.

Implemented adding a youtube-dl supported url to an existing file.

This commit was sponsored by andrea rota.
This commit is contained in:
Joey Hess 2017-11-30 13:45:43 -04:00
parent 8a0038ec23
commit 2528e3ddb0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 75 additions and 84 deletions

View file

@ -1017,10 +1017,10 @@ pruneTmpWorkDirBefore f action = do
{- Runs an action, passing it a temporary work directory where
- it can write files while receiving the content of a key.
-
- On exception, or when the action returns a Left value,
- On exception, or when the action returns Nothing,
- the temporary work directory is left, so resumes can use it.
-}
withTmpWorkDir :: Key -> (FilePath -> Annex (Either a b)) -> Annex (Either a b)
withTmpWorkDir :: Key -> (FilePath -> Annex (Maybe a)) -> Annex (Maybe a)
withTmpWorkDir key action = do
-- Create the object file if it does not exist. This way,
-- staleKeysPrune only has to look for object files, and can
@ -1034,8 +1034,8 @@ withTmpWorkDir key action = do
setAnnexDirPerm tmpdir
res <- action tmpdir
case res of
Right _ -> liftIO $ removeDirectoryRecursive tmpdir
Left _ -> noop
Just _ -> liftIO $ removeDirectoryRecursive tmpdir
Nothing -> noop
return res
{- Finds items in the first, smaller list, that are not

View file

@ -71,14 +71,12 @@ youtubeDlTo key url dest = do
case dl of
Right (Just mediafile) -> do
liftIO $ renameFile mediafile dest
return (Right True)
Right Nothing -> return (Right False)
Left msg -> return (Left msg)
case res of
Left msg -> do
warning msg
return False
Right r -> return r
return (Just True)
Right Nothing -> return (Just False)
Left msg -> do
warning msg
return Nothing
return (fromMaybe False res)
youtubeDlSupported :: URLString -> Annex Bool
youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url

View file

@ -3,10 +3,10 @@ git-annex (6.20171125) UNRELEASED; urgency=medium
* Use youtube-dl rather than quvi to download media from web pages,
since quvi is not being actively developed and youtube-dl supports
many more sites.
* addurl --relaxed no longer checks for embedded media, since youtube-dl
does not allow doing so without hitting the network, which would make
this no faster than addurl --fast. Use addurl --fast instead if you
want embedded media to be downloaded.
* addurl --relaxed got slower, since youtube-dl has to hit the network
to check for embedded media. If you relied on --relaxed not hitting the
network for speed reasons, using --relaxed --raw will get the old level
of speed, but can't be used for urls with embedded videos.
* importfeed now downloads things linked to by feeds, even when they are
not media files.

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
- Copyright 2011-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -155,8 +155,8 @@ performRemote r relaxed uri file sz = ifAnnexed file adduri geturi
loguri = setDownloader uri OtherDownloader
adduri = addUrlChecked relaxed loguri (Remote.uuid r) checkexistssize
checkexistssize key = return $ case sz of
Nothing -> (True, True)
Just n -> (True, n == fromMaybe n (keySize key))
Nothing -> (True, True, uri)
Just n -> (True, n == fromMaybe n (keySize key), uri)
geturi = next $ isJust <$> downloadRemoteFile r relaxed uri file sz
downloadRemoteFile :: Remote -> Bool -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key)
@ -210,22 +210,23 @@ performWeb :: AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPe
performWeb o url file urlinfo = ifAnnexed file addurl geturl
where
geturl = next $ isJust <$> addUrlFile (Just o) (relaxedOption o) url urlinfo file
-- TODO youtube-dl
addurl = addUrlChecked (relaxedOption o) url webUUID $ \k -> return $
(Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k)
addurl = addUrlChecked (relaxedOption o) url webUUID $ \k ->
ifM (youtubeDlSupported url)
( return (True, True, setDownloader url YoutubeDownloader)
, return (Url.urlExists urlinfo, Url.urlSize urlinfo == keySize k, url)
)
addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool)) -> Key -> CommandPerform
addUrlChecked relaxed url u checkexistssize key
| relaxed = do
setUrlPresent u key url
next $ return True
| otherwise = ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key))
{- Check that the url exists, and has the same size as the key,
- and add it as an url to the key. -}
addUrlChecked :: Bool -> URLString -> UUID -> (Key -> Annex (Bool, Bool, URLString)) -> Key -> CommandPerform
addUrlChecked relaxed url u checkexistssize key =
ifM ((elem url <$> getUrls key) <&&> (elem u <$> loggedLocations key))
( next $ return True -- nothing to do
, do
(exists, samesize) <- checkexistssize key
if exists && samesize
(exists, samesize, url') <- checkexistssize key
if exists && (samesize || relaxed)
then do
setUrlPresent u key url
setUrlPresent u key url'
next $ return True
else do
warning $ "while adding a new url to an already annexed file, " ++ if exists
@ -234,20 +235,16 @@ addUrlChecked relaxed url u checkexistssize key
stop
)
{- Downloads an url and adds it to the repository, normally at the specified
- FilePath. But, if youtube-dl supports the url, it will be written to a
{- Downloads an url (except in fast or relaxed mode) and adds it to the
- repository, normally at the specified FilePath.
- But, if youtube-dl supports the url, it will be written to a
- different file, based on the title of the media. Unless the user
- specified fileOption, which then forces using the FilePath.
-}
addUrlFile :: Maybe AddUrlOptions -> Bool -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
addUrlFile mo relaxed url urlinfo file
| relaxed = checkCanAdd file $ do
liftIO $ createDirectoryIfMissing True (parentDir file)
nodownload url urlinfo file
| otherwise = ifM (Annex.getState Annex.fast)
( checkCanAdd file $ do
liftIO $ createDirectoryIfMissing True (parentDir file)
nodownload url urlinfo file
addUrlFile mo relaxed url urlinfo file =
ifM (Annex.getState Annex.fast <||> pure relaxed)
( nodownloadWeb url urlinfo file
, downloadWeb mo url urlinfo file
)
@ -266,33 +263,31 @@ downloadWeb mo url urlinfo file =
( tryyoutubedl tmp
, normalfinish tmp
)
normalfinish tmp = do
normalfinish tmp = checkCanAdd file $ do
showDestinationFile file
liftIO $ createDirectoryIfMissing True (parentDir file)
finishDownloadWith tmp webUUID url file
tryyoutubedl tmp = do
let mediaurl = setDownloader url YoutubeDownloader
let mediakey = Backend.URL.fromUrl mediaurl Nothing
res <- withTmpWorkDir mediakey $ \workdir ->
Transfer.notifyTransfer Transfer.Download url $
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do
dl <- youtubeDl url workdir
case dl of
Right (Just mediafile) -> do
pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
let dest = if isJust (fileOption <$> mo)
then file
else takeFileName mediafile
tryyoutubedl tmp = withTmpWorkDir mediakey $ \workdir ->
Transfer.notifyTransfer Transfer.Download url $
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do
dl <- youtubeDl url workdir
case dl of
Right (Just mediafile) -> do
pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
let dest = if isJust (fileOption <$> mo)
then file
else takeFileName mediafile
checkCanAdd dest $ do
showDestinationFile dest
addWorkTree webUUID mediaurl dest mediakey (Just mediafile)
return $ Right $ Just mediakey
Right Nothing -> Right <$> normalfinish tmp
Left msg -> return $ Left msg
case res of
Left msg -> do
warning msg
return Nothing
Right r -> return r
return $ Just mediakey
Right Nothing -> normalfinish tmp
Left msg -> do
warning msg
return Nothing
where
mediaurl = setDownloader url YoutubeDownloader
mediakey = Backend.URL.fromUrl mediaurl Nothing
showDestinationFile :: FilePath -> Annex ()
showDestinationFile file = do
@ -378,9 +373,10 @@ addWorkTree u url file key mtmp = case mtmp of
)
-- TODO youtube-dl
nodownload :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
nodownload url urlinfo file
| Url.urlExists urlinfo = do
nodownloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
nodownloadWeb url urlinfo file
| Url.urlExists urlinfo = checkCanAdd file $ do
liftIO $ createDirectoryIfMissing True (parentDir file)
let key = Backend.URL.fromUrl url (Url.urlSize urlinfo)
addWorkTree webUUID url file key Nothing
return (Just key)

View file

@ -272,19 +272,17 @@ performDownload opts cache todownload = case location todownload of
ok <- rundownload linkurl ext $ \f -> do
addWorkTree webUUID mediaurl f mediakey (Just mediafile)
return [mediakey]
return (Right ok)
return (Just ok)
-- youtude-dl didn't support it, so
-- download it as if the link were
-- an enclosure.
Right Nothing -> Right <$>
Right Nothing -> Just <$>
performDownload opts cache todownload
{ location = Enclosure linkurl }
Left msg -> return (Left msg)
case r of
Left msg -> do
warning msg
return False
Right b -> return b
Left msg -> do
warning msg
return Nothing
return (fromMaybe False r)
addmediafast linkurl mediaurl mediakey = ifM (youtubeDlSupported linkurl)
( rundownload linkurl ".m" $ \f -> do

View file

@ -87,7 +87,7 @@ instance Observable (Either e Bool) where
observeBool (Right b) = b
observeFailure = Right False
instance Observable (Either e (Maybe a)) where
observeBool (Right (Just _)) = True
observeBool _ = False
observeFailure = Right Nothing
instance Observable (Maybe a) where
observeBool (Just _) = True
observeBool Nothing = False
observeFailure = Nothing

View file

@ -32,13 +32,12 @@ be used to get better filenames.
Don't immediately download the url, and avoid storing the size of the
url's content. This makes git-annex accept whatever content is there
at a future point. This is also the fastest option, since it does not
hit the network at all.
Note that this does *not* check for embedded videos using `youtube-dl`,
although it used to in previous versions of git-annex.
Use --fast instead if you want to do that.
at a future point.
This is the fastest option, but it still has to access the network
to check if the url contains embedded media. When adding large numbers
of urls, using `--relaxed --raw` is much faster.
* `--raw`
Prevent special handling of urls by youtube-dl, bittorrent, and other