youtube-dl working

Including resuming and cleanup of incomplete downloads.

Still todo: --fast, --relaxed, importfeed, disk reserve checking,
quvi code cleanup.

This commit was sponsored by Anthony DeRobertis on Patreon.
This commit is contained in:
Joey Hess 2017-11-29 15:49:05 -04:00
parent 4e7e1fcff4
commit 99bebdface
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
12 changed files with 163 additions and 70 deletions

View file

@ -1017,14 +1017,15 @@ pruneTmpWorkDirBefore f action = do
{- Runs an action, passing it a temporary work directory where
- it can write files while receiving the content of a key.
-
- On exception, the temporary work directory is left, so resumes can
- use it.
- On exception, or when the action returns a Left value,
- the temporary work directory is left, so resumes can use it.
-}
withTmpWorkDir :: Key -> (FilePath -> Annex a) -> Annex a
withTmpWorkDir key action = withTmp key $ \obj -> do
withTmpWorkDir :: Key -> (FilePath -> Annex (Either a b)) -> Annex (Either a b)
withTmpWorkDir key action = do
-- Create the object file if it does not exist. This way,
-- staleKeysPrune only has to look for object files, and can
-- clean up gitAnnexTmpWorkDir for those it finds.
obj <- prepTmp key
unlessM (liftIO $ doesFileExist obj) $ do
liftIO $ writeFile obj ""
setAnnexFilePerm obj
@ -1032,7 +1033,9 @@ withTmpWorkDir key action = withTmp key $ \obj -> do
liftIO $ createDirectoryIfMissing True tmpdir
setAnnexDirPerm tmpdir
res <- action tmpdir
liftIO $ removeDirectoryRecursive tmpdir
case res of
Right _ -> liftIO $ removeDirectoryRecursive tmpdir
Left _ -> noop
return res
{- Finds items in the first, smaller list, that are not

View file

@ -263,7 +263,7 @@ gitAnnexTmpWorkDir :: FilePath -> FilePath
gitAnnexTmpWorkDir p =
let (dir, f) = splitFileName p
-- Using a prefix avoids name conflict with any other keys.
in dir </> "work." </> f
in dir </> "work." ++ f
{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> FilePath

View file

@ -39,7 +39,7 @@ instance Transferrable URLString where
{- Wrap around an action that performs a transfer, which may run multiple
- attempts. Displays notification when supported and when the user asked
- for it. -}
notifyTransfer :: Transferrable t => Direction -> t -> (NotifyWitness -> Annex Bool) -> Annex Bool
notifyTransfer :: Transferrable t => Observable v => Direction -> t -> (NotifyWitness -> Annex v) -> Annex v
notifyTransfer direction t a = case descTransfrerrable t of
Nothing -> a NotifyWitness
Just desc -> do
@ -51,12 +51,13 @@ notifyTransfer direction t a = case descTransfrerrable t of
startnotification <- liftIO $ if notifyStart wanted
then Just <$> Notify.notify client (startedTransferNote direction desc)
else pure Nothing
ok <- a NotifyWitness
res <- a NotifyWitness
let ok = observeBool res
when (notifyFinish wanted) $ liftIO $ void $ maybe
(Notify.notify client $ finishedTransferNote ok direction desc)
(\n -> Notify.replace client n $ finishedTransferNote ok direction desc)
startnotification
return ok
return res
else a NotifyWitness
#else
a NotifyWitness

View file

@ -17,7 +17,7 @@ import Utility.Url
withQuviOptions :: forall a. Query a -> [QuviParams] -> URLString -> Annex a
withQuviOptions a ps url = do
v <- quviVersion
opts <- map Param . annexQuviOptions <$> Annex.getGitConfig
opts <- return []
liftIO $ a v (concatMap (\mkp -> mkp v) ps ++ opts) url
quviSupported :: URLString -> Annex Bool

View file

@ -5,7 +5,7 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP, FlexibleInstances, BangPatterns #-}
{-# LANGUAGE CPP, BangPatterns #-}
module Annex.Transfer (
module X,
@ -27,7 +27,6 @@ import Annex.Perms
import Utility.Metered
import Annex.LockPool
import Types.Key
import Types.Remote (Verification(..))
import qualified Types.Remote as Remote
import Types.Concurrency
@ -35,23 +34,6 @@ import Control.Concurrent
import qualified Data.Map.Strict as M
import Data.Ord
class Observable a where
observeBool :: a -> Bool
observeFailure :: a
instance Observable Bool where
observeBool = id
observeFailure = False
instance Observable (Bool, Verification) where
observeBool = fst
observeFailure = (False, UnVerified)
instance Observable (Either e Bool) where
observeBool (Left _) = False
observeBool (Right b) = b
observeFailure = Right False
upload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
upload u key f d a _witness = guardHaveUUID u $
runTransfer (Transfer Upload u key) f d a

86
Annex/YoutubeDl.hs Normal file
View file

@ -0,0 +1,86 @@
{- youtube-dl integration for git-annex
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.YoutubeDl where
import Annex.Common
import qualified Annex
import Annex.Content
import Utility.Url (URLString)
-- Runs youtube-dl in a work directory, to download a single media file
-- from the url. Reutrns the path to the media file in the work directory.
--
-- If youtube-dl fails without writing any files to the work directory,
-- or is not installed, returns Right Nothing.
--
-- The work directory can contain files from a previous run of youtube-dl
-- and it will resume. It should not contain any other files though,
-- and youtube-dl needs to finish up with only one file in the directory
-- so we know which one it downloaded.
--
-- (Note that we can't use --output to specifiy the file to download to,
-- due to <https://github.com/rg3/youtube-dl/issues/14864>)
youtubeDl :: URLString -> FilePath -> Annex (Either String (Maybe FilePath))
youtubeDl url workdir = ifM (liftIO (inPath "youtube-dl") <&&> runcmd)
( do
fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir
case fs of
(f:[]) -> return (Right (Just f))
[] -> return nofiles
_ -> return (toomanyfiles fs)
, do
fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir
if null fs
then return (Right Nothing)
else return (Left "youtube-dl download is incomplete. Run the command again to resume.")
)
where
nofiles = Left "youtube-dl did not put any media in its work directory, perhaps it's been configured to store files somewhere else?"
toomanyfiles fs = Left $ "youtube-dl downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs
runcmd = do
opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig
quiet <- commandProgressDisabled
let opts' = opts ++
[ Param url
-- To make youtube-dl only download one file,
-- when given a page with a video and a playlist,
-- download only the video.
, Param "--no-playlist"
-- And when given a page with only a playlist,
-- download only the first video on the playlist.
-- (Assumes the video is somewhat stable, but
-- this is the only way to prevent youtube-dl
-- from downloading the whole playlist.)
, Param "--playlist-items", Param "0"
-- TODO --max-filesize
] ++
if quiet then [ Param "--quiet" ] else []
liftIO $ boolSystem' "youtube-dl" opts' $
\p -> p { cwd = Just workdir }
-- Download a media file to a destination,
youtubeDlTo :: Key -> URLString -> FilePath -> Annex Bool
youtubeDlTo key url dest = do
res <- withTmpWorkDir key $ \workdir -> do
dl <- youtubeDl url workdir
case dl of
Right (Just mediafile) -> do
liftIO $ renameFile mediafile dest
return (Right True)
Right Nothing -> return (Right False)
Left msg -> return (Left msg)
case res of
Left msg -> do
warning msg
return False
Right r -> return r
-- Check if youtube-dl can still find media in an url.
youtubeDlSupported :: URLString -> Annex (Either String Bool)
youtubeDlSupported url = liftIO $ catchMsgIO $
snd <$> processTranscript "youtube-dl" [ url, "--simulate" ] Nothing

View file

@ -21,6 +21,7 @@ import Annex.Content
import Annex.Ingest
import Annex.CheckIgnore
import Annex.UUID
import Annex.YoutubeDl
import Logs.Web
import Types.KeySource
import Types.UrlContents
@ -291,42 +292,44 @@ addUrlFile relaxed url urlinfo file
downloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
downloadWeb url urlinfo file =
go =<< downloadWith' downloader dummykey webUUID url (AssociatedFile (Just file))
go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file))
where
dummykey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
downloader f p = do
showOutput
downloadUrl dummykey p [url] f
downloadUrl urlkey p [url] f
go Nothing = return Nothing
-- If we downloaded a html file, try to use youtube-dl to
-- extract embedded media.
go (Just tmp) = ifM (liftIO $ isHtml <$> readFile tmp)
( do
-- TODO need a directory based on dummykey,
-- which unused needs to clean up like
-- it does gitAnnexTmpObjectLocation
tmpdir <- undefined
liftIO $ createDirectoryIfMissing True tmpdir
mf <- youtubeDl url tmpdir
case mf of
Just mediafile -> do
liftIO $ nukeFile tmp
let mediaurl = setDownloader url YoutubeDownloader
let key = Backend.URL.fromUrl mediaurl Nothing
let dest = takeFileName mediafile
showDestinationFile dest
cleanup webUUID mediaurl dest key (Just mediafile)
return (Just key)
Nothing -> normalfinish tmp
( tryyoutubedl tmp
, normalfinish tmp
)
normalfinish tmp = do
showDestinationFile file
liftIO $ createDirectoryIfMissing True (parentDir file)
finishDownloadWith tmp webUUID url file
youtubeDl :: URLString -> FilePath -> Annex (Maybe FilePath)
youtubeDl = undefined -- TODO
tryyoutubedl tmp = do
let mediaurl = setDownloader url YoutubeDownloader
let mediakey = Backend.URL.fromUrl mediaurl Nothing
res <- withTmpWorkDir mediakey $ \workdir ->
Transfer.notifyTransfer Transfer.Download url $
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do
dl <- youtubeDl url workdir
case dl of
Right (Just mediafile) -> do
pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
let dest = takeFileName mediafile
showDestinationFile dest
cleanup webUUID mediaurl dest mediakey (Just mediafile)
return $ Right $ Just mediakey
Right Nothing -> Right <$> normalfinish tmp
Left msg -> return $ Left msg
case res of
Left msg -> do
warning msg
return Nothing
Right r -> return r
showDestinationFile :: FilePath -> Annex ()
showDestinationFile file = do
@ -388,7 +391,7 @@ cleanup u url file key mtmp = case mtmp of
Nothing -> go
Just tmp -> do
-- Move to final location for large file check.
liftIO $ renameFile tmp file
pruneTmpWorkDirBefore tmp (\_ -> liftIO $ renameFile tmp file)
largematcher <- largeFilesMatcher
large <- checkFileMatcher largematcher file
if large
@ -407,7 +410,7 @@ cleanup u url file key mtmp = case mtmp of
( do
when (isJust mtmp) $
logStatus key InfoPresent
, liftIO $ maybe noop nukeFile mtmp
, maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . nukeFile)) mtmp
)
-- TODO youtube-dl

View file

@ -19,8 +19,7 @@ import Logs.Web
import Annex.UUID
import Utility.Metered
import qualified Annex.Url as Url
import Annex.Quvi
import qualified Utility.Quvi as Quvi
import Annex.YoutubeDl
remote :: RemoteType
remote = RemoteType
@ -80,9 +79,7 @@ downloadKey key _af dest p = unVerified $ get =<< getWebUrls key
untilTrue urls $ \u -> do
let (u', downloader) = getDownloader u
case downloader of
QuviDownloader -> do
flip (downloadUrl key p) dest
=<< withQuviOptions Quvi.queryLinks [Quvi.httponly, Quvi.quiet] u'
YoutubeDownloader -> youtubeDlTo key u' dest
_ -> downloadUrl key p [u'] dest
downloadKeyCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
@ -109,8 +106,7 @@ checkKey' key us = firsthit us (Right False) $ \u -> do
let (u', downloader) = getDownloader u
showChecking u'
case downloader of
QuviDownloader ->
Right <$> withQuviOptions Quvi.check [Quvi.httponly, Quvi.quiet] u'
YoutubeDownloader -> youtubeDlSupported u'
_ -> do
Url.withUrlOptions $ catchMsgIO .
Url.checkBoth u' (keySize key)
@ -126,4 +122,4 @@ getWebUrls :: Key -> Annex [URLString]
getWebUrls key = filter supported <$> getUrls key
where
supported u = snd (getDownloader u)
`elem` [WebDownloader, QuviDownloader]
`elem` [WebDownloader, YoutubeDownloader]

View file

@ -67,7 +67,7 @@ data GitConfig = GitConfig
, annexSyncContent :: Configurable Bool
, annexDebug :: Bool
, annexWebOptions :: [String]
, annexQuviOptions :: [String]
, annexYoutubeDlOptions :: [String]
, annexAriaTorrentOptions :: [String]
, annexWebDownloadCommand :: Maybe String
, annexCrippledFileSystem :: Bool
@ -127,7 +127,7 @@ extractGitConfig r = GitConfig
getmaybebool (annex "synccontent")
, annexDebug = getbool (annex "debug") False
, annexWebOptions = getwords (annex "web-options")
, annexQuviOptions = getwords (annex "quvi-options")
, annexYoutubeDlOptions = getwords (annex "youtube-dl-options")
, annexAriaTorrentOptions = getwords (annex "aria-torrent-options")
, annexWebDownloadCommand = getmaybe (annex "web-download-command")
, annexCrippledFileSystem = getbool (annex "crippledfilesystem") False

View file

@ -5,9 +5,12 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE FlexibleInstances #-}
module Types.Transfer where
import Types
import Types.Remote (Verification(..))
import Utility.PID
import Utility.QuickCheck
@ -66,3 +69,25 @@ instance Arbitrary TransferInfo where
-- associated file cannot be empty (but can be Nothing)
<*> (AssociatedFile <$> arbitrary `suchThat` (/= Just ""))
<*> arbitrary
class Observable a where
observeBool :: a -> Bool
observeFailure :: a
instance Observable Bool where
observeBool = id
observeFailure = False
instance Observable (Bool, Verification) where
observeBool = fst
observeFailure = (False, UnVerified)
instance Observable (Either e Bool) where
observeBool (Left _) = False
observeBool (Right b) = b
observeFailure = Right False
instance Observable (Either e (Maybe a)) where
observeBool (Right (Just _)) = True
observeBool _ = False
observeFailure = Right Nothing

View file

@ -10,8 +10,8 @@ git annex addurl `[url ...]`
Downloads each url to its own file, which is added to the annex.
When `youtube-dl` is installed, it's used to download videos
embedded on web pages.
When `youtube-dl` is installed, it's used to check for a video embedded in
a web page at the url, and that is added to the annex instead.
Urls to torrent files (including magnet links) will cause the content of
the torrent to be downloaded, using `aria2c`.
@ -28,10 +28,6 @@ be used to get better filenames.
Avoid immediately downloading the url. The url is still checked
(via HEAD) to verify that it exists, and to get its size if possible.
When `youtube-dl` is installed, videos embedded on web pages
will be added. To avoid the extra work of checking for videos,
add the `--raw` option.
* `--relaxed`
Don't immediately download the url, and avoid storing the size of the

View file

@ -547,6 +547,7 @@ Executable git-annex
Annex.View.ViewedFile
Annex.Wanted
Annex.WorkTree
Annex.YoutubeDl
Assistant
Assistant.Alert
Assistant.Alert.Utility