youtube-dl working
Including resuming and cleanup of incomplete downloads. Still todo: --fast, --relaxed, importfeed, disk reserve checking, quvi code cleanup. This commit was sponsored by Anthony DeRobertis on Patreon.
This commit is contained in:
parent
4e7e1fcff4
commit
99bebdface
12 changed files with 163 additions and 70 deletions
|
@ -1017,14 +1017,15 @@ pruneTmpWorkDirBefore f action = do
|
|||
{- Runs an action, passing it a temporary work directory where
|
||||
- it can write files while receiving the content of a key.
|
||||
-
|
||||
- On exception, the temporary work directory is left, so resumes can
|
||||
- use it.
|
||||
- On exception, or when the action returns a Left value,
|
||||
- the temporary work directory is left, so resumes can use it.
|
||||
-}
|
||||
withTmpWorkDir :: Key -> (FilePath -> Annex a) -> Annex a
|
||||
withTmpWorkDir key action = withTmp key $ \obj -> do
|
||||
withTmpWorkDir :: Key -> (FilePath -> Annex (Either a b)) -> Annex (Either a b)
|
||||
withTmpWorkDir key action = do
|
||||
-- Create the object file if it does not exist. This way,
|
||||
-- staleKeysPrune only has to look for object files, and can
|
||||
-- clean up gitAnnexTmpWorkDir for those it finds.
|
||||
obj <- prepTmp key
|
||||
unlessM (liftIO $ doesFileExist obj) $ do
|
||||
liftIO $ writeFile obj ""
|
||||
setAnnexFilePerm obj
|
||||
|
@ -1032,7 +1033,9 @@ withTmpWorkDir key action = withTmp key $ \obj -> do
|
|||
liftIO $ createDirectoryIfMissing True tmpdir
|
||||
setAnnexDirPerm tmpdir
|
||||
res <- action tmpdir
|
||||
liftIO $ removeDirectoryRecursive tmpdir
|
||||
case res of
|
||||
Right _ -> liftIO $ removeDirectoryRecursive tmpdir
|
||||
Left _ -> noop
|
||||
return res
|
||||
|
||||
{- Finds items in the first, smaller list, that are not
|
||||
|
|
|
@ -263,7 +263,7 @@ gitAnnexTmpWorkDir :: FilePath -> FilePath
|
|||
gitAnnexTmpWorkDir p =
|
||||
let (dir, f) = splitFileName p
|
||||
-- Using a prefix avoids name conflict with any other keys.
|
||||
in dir </> "work." </> f
|
||||
in dir </> "work." ++ f
|
||||
|
||||
{- .git/annex/bad/ is used for bad files found during fsck -}
|
||||
gitAnnexBadDir :: Git.Repo -> FilePath
|
||||
|
|
|
@ -39,7 +39,7 @@ instance Transferrable URLString where
|
|||
{- Wrap around an action that performs a transfer, which may run multiple
|
||||
- attempts. Displays notification when supported and when the user asked
|
||||
- for it. -}
|
||||
notifyTransfer :: Transferrable t => Direction -> t -> (NotifyWitness -> Annex Bool) -> Annex Bool
|
||||
notifyTransfer :: Transferrable t => Observable v => Direction -> t -> (NotifyWitness -> Annex v) -> Annex v
|
||||
notifyTransfer direction t a = case descTransfrerrable t of
|
||||
Nothing -> a NotifyWitness
|
||||
Just desc -> do
|
||||
|
@ -51,12 +51,13 @@ notifyTransfer direction t a = case descTransfrerrable t of
|
|||
startnotification <- liftIO $ if notifyStart wanted
|
||||
then Just <$> Notify.notify client (startedTransferNote direction desc)
|
||||
else pure Nothing
|
||||
ok <- a NotifyWitness
|
||||
res <- a NotifyWitness
|
||||
let ok = observeBool res
|
||||
when (notifyFinish wanted) $ liftIO $ void $ maybe
|
||||
(Notify.notify client $ finishedTransferNote ok direction desc)
|
||||
(\n -> Notify.replace client n $ finishedTransferNote ok direction desc)
|
||||
startnotification
|
||||
return ok
|
||||
return res
|
||||
else a NotifyWitness
|
||||
#else
|
||||
a NotifyWitness
|
||||
|
|
|
@ -17,7 +17,7 @@ import Utility.Url
|
|||
withQuviOptions :: forall a. Query a -> [QuviParams] -> URLString -> Annex a
|
||||
withQuviOptions a ps url = do
|
||||
v <- quviVersion
|
||||
opts <- map Param . annexQuviOptions <$> Annex.getGitConfig
|
||||
opts <- return []
|
||||
liftIO $ a v (concatMap (\mkp -> mkp v) ps ++ opts) url
|
||||
|
||||
quviSupported :: URLString -> Annex Bool
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP, FlexibleInstances, BangPatterns #-}
|
||||
{-# LANGUAGE CPP, BangPatterns #-}
|
||||
|
||||
module Annex.Transfer (
|
||||
module X,
|
||||
|
@ -27,7 +27,6 @@ import Annex.Perms
|
|||
import Utility.Metered
|
||||
import Annex.LockPool
|
||||
import Types.Key
|
||||
import Types.Remote (Verification(..))
|
||||
import qualified Types.Remote as Remote
|
||||
import Types.Concurrency
|
||||
|
||||
|
@ -35,23 +34,6 @@ import Control.Concurrent
|
|||
import qualified Data.Map.Strict as M
|
||||
import Data.Ord
|
||||
|
||||
class Observable a where
|
||||
observeBool :: a -> Bool
|
||||
observeFailure :: a
|
||||
|
||||
instance Observable Bool where
|
||||
observeBool = id
|
||||
observeFailure = False
|
||||
|
||||
instance Observable (Bool, Verification) where
|
||||
observeBool = fst
|
||||
observeFailure = (False, UnVerified)
|
||||
|
||||
instance Observable (Either e Bool) where
|
||||
observeBool (Left _) = False
|
||||
observeBool (Right b) = b
|
||||
observeFailure = Right False
|
||||
|
||||
upload :: Observable v => UUID -> Key -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> NotifyWitness -> Annex v
|
||||
upload u key f d a _witness = guardHaveUUID u $
|
||||
runTransfer (Transfer Upload u key) f d a
|
||||
|
|
86
Annex/YoutubeDl.hs
Normal file
86
Annex/YoutubeDl.hs
Normal file
|
@ -0,0 +1,86 @@
|
|||
{- youtube-dl integration for git-annex
|
||||
-
|
||||
- Copyright 2017 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.YoutubeDl where
|
||||
|
||||
import Annex.Common
|
||||
import qualified Annex
|
||||
import Annex.Content
|
||||
import Utility.Url (URLString)
|
||||
|
||||
-- Runs youtube-dl in a work directory, to download a single media file
|
||||
-- from the url. Reutrns the path to the media file in the work directory.
|
||||
--
|
||||
-- If youtube-dl fails without writing any files to the work directory,
|
||||
-- or is not installed, returns Right Nothing.
|
||||
--
|
||||
-- The work directory can contain files from a previous run of youtube-dl
|
||||
-- and it will resume. It should not contain any other files though,
|
||||
-- and youtube-dl needs to finish up with only one file in the directory
|
||||
-- so we know which one it downloaded.
|
||||
--
|
||||
-- (Note that we can't use --output to specifiy the file to download to,
|
||||
-- due to <https://github.com/rg3/youtube-dl/issues/14864>)
|
||||
youtubeDl :: URLString -> FilePath -> Annex (Either String (Maybe FilePath))
|
||||
youtubeDl url workdir = ifM (liftIO (inPath "youtube-dl") <&&> runcmd)
|
||||
( do
|
||||
fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir
|
||||
case fs of
|
||||
(f:[]) -> return (Right (Just f))
|
||||
[] -> return nofiles
|
||||
_ -> return (toomanyfiles fs)
|
||||
, do
|
||||
fs <- liftIO $ filterM (doesFileExist) =<< dirContents workdir
|
||||
if null fs
|
||||
then return (Right Nothing)
|
||||
else return (Left "youtube-dl download is incomplete. Run the command again to resume.")
|
||||
)
|
||||
where
|
||||
nofiles = Left "youtube-dl did not put any media in its work directory, perhaps it's been configured to store files somewhere else?"
|
||||
toomanyfiles fs = Left $ "youtube-dl downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs
|
||||
runcmd = do
|
||||
opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig
|
||||
quiet <- commandProgressDisabled
|
||||
let opts' = opts ++
|
||||
[ Param url
|
||||
-- To make youtube-dl only download one file,
|
||||
-- when given a page with a video and a playlist,
|
||||
-- download only the video.
|
||||
, Param "--no-playlist"
|
||||
-- And when given a page with only a playlist,
|
||||
-- download only the first video on the playlist.
|
||||
-- (Assumes the video is somewhat stable, but
|
||||
-- this is the only way to prevent youtube-dl
|
||||
-- from downloading the whole playlist.)
|
||||
, Param "--playlist-items", Param "0"
|
||||
-- TODO --max-filesize
|
||||
] ++
|
||||
if quiet then [ Param "--quiet" ] else []
|
||||
liftIO $ boolSystem' "youtube-dl" opts' $
|
||||
\p -> p { cwd = Just workdir }
|
||||
|
||||
-- Download a media file to a destination,
|
||||
youtubeDlTo :: Key -> URLString -> FilePath -> Annex Bool
|
||||
youtubeDlTo key url dest = do
|
||||
res <- withTmpWorkDir key $ \workdir -> do
|
||||
dl <- youtubeDl url workdir
|
||||
case dl of
|
||||
Right (Just mediafile) -> do
|
||||
liftIO $ renameFile mediafile dest
|
||||
return (Right True)
|
||||
Right Nothing -> return (Right False)
|
||||
Left msg -> return (Left msg)
|
||||
case res of
|
||||
Left msg -> do
|
||||
warning msg
|
||||
return False
|
||||
Right r -> return r
|
||||
|
||||
-- Check if youtube-dl can still find media in an url.
|
||||
youtubeDlSupported :: URLString -> Annex (Either String Bool)
|
||||
youtubeDlSupported url = liftIO $ catchMsgIO $
|
||||
snd <$> processTranscript "youtube-dl" [ url, "--simulate" ] Nothing
|
|
@ -21,6 +21,7 @@ import Annex.Content
|
|||
import Annex.Ingest
|
||||
import Annex.CheckIgnore
|
||||
import Annex.UUID
|
||||
import Annex.YoutubeDl
|
||||
import Logs.Web
|
||||
import Types.KeySource
|
||||
import Types.UrlContents
|
||||
|
@ -291,42 +292,44 @@ addUrlFile relaxed url urlinfo file
|
|||
|
||||
downloadWeb :: URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
|
||||
downloadWeb url urlinfo file =
|
||||
go =<< downloadWith' downloader dummykey webUUID url (AssociatedFile (Just file))
|
||||
go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file))
|
||||
where
|
||||
dummykey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
|
||||
urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
|
||||
downloader f p = do
|
||||
showOutput
|
||||
downloadUrl dummykey p [url] f
|
||||
downloadUrl urlkey p [url] f
|
||||
go Nothing = return Nothing
|
||||
-- If we downloaded a html file, try to use youtube-dl to
|
||||
-- extract embedded media.
|
||||
go (Just tmp) = ifM (liftIO $ isHtml <$> readFile tmp)
|
||||
( do
|
||||
-- TODO need a directory based on dummykey,
|
||||
-- which unused needs to clean up like
|
||||
-- it does gitAnnexTmpObjectLocation
|
||||
tmpdir <- undefined
|
||||
liftIO $ createDirectoryIfMissing True tmpdir
|
||||
mf <- youtubeDl url tmpdir
|
||||
case mf of
|
||||
Just mediafile -> do
|
||||
liftIO $ nukeFile tmp
|
||||
let mediaurl = setDownloader url YoutubeDownloader
|
||||
let key = Backend.URL.fromUrl mediaurl Nothing
|
||||
let dest = takeFileName mediafile
|
||||
showDestinationFile dest
|
||||
cleanup webUUID mediaurl dest key (Just mediafile)
|
||||
return (Just key)
|
||||
Nothing -> normalfinish tmp
|
||||
( tryyoutubedl tmp
|
||||
, normalfinish tmp
|
||||
)
|
||||
normalfinish tmp = do
|
||||
showDestinationFile file
|
||||
liftIO $ createDirectoryIfMissing True (parentDir file)
|
||||
finishDownloadWith tmp webUUID url file
|
||||
|
||||
youtubeDl :: URLString -> FilePath -> Annex (Maybe FilePath)
|
||||
youtubeDl = undefined -- TODO
|
||||
tryyoutubedl tmp = do
|
||||
let mediaurl = setDownloader url YoutubeDownloader
|
||||
let mediakey = Backend.URL.fromUrl mediaurl Nothing
|
||||
res <- withTmpWorkDir mediakey $ \workdir ->
|
||||
Transfer.notifyTransfer Transfer.Download url $
|
||||
Transfer.download webUUID mediakey (AssociatedFile Nothing) Transfer.noRetry $ \_p -> do
|
||||
dl <- youtubeDl url workdir
|
||||
case dl of
|
||||
Right (Just mediafile) -> do
|
||||
pruneTmpWorkDirBefore tmp (liftIO . nukeFile)
|
||||
let dest = takeFileName mediafile
|
||||
showDestinationFile dest
|
||||
cleanup webUUID mediaurl dest mediakey (Just mediafile)
|
||||
return $ Right $ Just mediakey
|
||||
Right Nothing -> Right <$> normalfinish tmp
|
||||
Left msg -> return $ Left msg
|
||||
case res of
|
||||
Left msg -> do
|
||||
warning msg
|
||||
return Nothing
|
||||
Right r -> return r
|
||||
|
||||
showDestinationFile :: FilePath -> Annex ()
|
||||
showDestinationFile file = do
|
||||
|
@ -388,7 +391,7 @@ cleanup u url file key mtmp = case mtmp of
|
|||
Nothing -> go
|
||||
Just tmp -> do
|
||||
-- Move to final location for large file check.
|
||||
liftIO $ renameFile tmp file
|
||||
pruneTmpWorkDirBefore tmp (\_ -> liftIO $ renameFile tmp file)
|
||||
largematcher <- largeFilesMatcher
|
||||
large <- checkFileMatcher largematcher file
|
||||
if large
|
||||
|
@ -407,7 +410,7 @@ cleanup u url file key mtmp = case mtmp of
|
|||
( do
|
||||
when (isJust mtmp) $
|
||||
logStatus key InfoPresent
|
||||
, liftIO $ maybe noop nukeFile mtmp
|
||||
, maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . nukeFile)) mtmp
|
||||
)
|
||||
|
||||
-- TODO youtube-dl
|
||||
|
|
|
@ -19,8 +19,7 @@ import Logs.Web
|
|||
import Annex.UUID
|
||||
import Utility.Metered
|
||||
import qualified Annex.Url as Url
|
||||
import Annex.Quvi
|
||||
import qualified Utility.Quvi as Quvi
|
||||
import Annex.YoutubeDl
|
||||
|
||||
remote :: RemoteType
|
||||
remote = RemoteType
|
||||
|
@ -80,9 +79,7 @@ downloadKey key _af dest p = unVerified $ get =<< getWebUrls key
|
|||
untilTrue urls $ \u -> do
|
||||
let (u', downloader) = getDownloader u
|
||||
case downloader of
|
||||
QuviDownloader -> do
|
||||
flip (downloadUrl key p) dest
|
||||
=<< withQuviOptions Quvi.queryLinks [Quvi.httponly, Quvi.quiet] u'
|
||||
YoutubeDownloader -> youtubeDlTo key u' dest
|
||||
_ -> downloadUrl key p [u'] dest
|
||||
|
||||
downloadKeyCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
|
||||
|
@ -109,8 +106,7 @@ checkKey' key us = firsthit us (Right False) $ \u -> do
|
|||
let (u', downloader) = getDownloader u
|
||||
showChecking u'
|
||||
case downloader of
|
||||
QuviDownloader ->
|
||||
Right <$> withQuviOptions Quvi.check [Quvi.httponly, Quvi.quiet] u'
|
||||
YoutubeDownloader -> youtubeDlSupported u'
|
||||
_ -> do
|
||||
Url.withUrlOptions $ catchMsgIO .
|
||||
Url.checkBoth u' (keySize key)
|
||||
|
@ -126,4 +122,4 @@ getWebUrls :: Key -> Annex [URLString]
|
|||
getWebUrls key = filter supported <$> getUrls key
|
||||
where
|
||||
supported u = snd (getDownloader u)
|
||||
`elem` [WebDownloader, QuviDownloader]
|
||||
`elem` [WebDownloader, YoutubeDownloader]
|
||||
|
|
|
@ -67,7 +67,7 @@ data GitConfig = GitConfig
|
|||
, annexSyncContent :: Configurable Bool
|
||||
, annexDebug :: Bool
|
||||
, annexWebOptions :: [String]
|
||||
, annexQuviOptions :: [String]
|
||||
, annexYoutubeDlOptions :: [String]
|
||||
, annexAriaTorrentOptions :: [String]
|
||||
, annexWebDownloadCommand :: Maybe String
|
||||
, annexCrippledFileSystem :: Bool
|
||||
|
@ -127,7 +127,7 @@ extractGitConfig r = GitConfig
|
|||
getmaybebool (annex "synccontent")
|
||||
, annexDebug = getbool (annex "debug") False
|
||||
, annexWebOptions = getwords (annex "web-options")
|
||||
, annexQuviOptions = getwords (annex "quvi-options")
|
||||
, annexYoutubeDlOptions = getwords (annex "youtube-dl-options")
|
||||
, annexAriaTorrentOptions = getwords (annex "aria-torrent-options")
|
||||
, annexWebDownloadCommand = getmaybe (annex "web-download-command")
|
||||
, annexCrippledFileSystem = getbool (annex "crippledfilesystem") False
|
||||
|
|
|
@ -5,9 +5,12 @@
|
|||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
|
||||
module Types.Transfer where
|
||||
|
||||
import Types
|
||||
import Types.Remote (Verification(..))
|
||||
import Utility.PID
|
||||
import Utility.QuickCheck
|
||||
|
||||
|
@ -66,3 +69,25 @@ instance Arbitrary TransferInfo where
|
|||
-- associated file cannot be empty (but can be Nothing)
|
||||
<*> (AssociatedFile <$> arbitrary `suchThat` (/= Just ""))
|
||||
<*> arbitrary
|
||||
|
||||
class Observable a where
|
||||
observeBool :: a -> Bool
|
||||
observeFailure :: a
|
||||
|
||||
instance Observable Bool where
|
||||
observeBool = id
|
||||
observeFailure = False
|
||||
|
||||
instance Observable (Bool, Verification) where
|
||||
observeBool = fst
|
||||
observeFailure = (False, UnVerified)
|
||||
|
||||
instance Observable (Either e Bool) where
|
||||
observeBool (Left _) = False
|
||||
observeBool (Right b) = b
|
||||
observeFailure = Right False
|
||||
|
||||
instance Observable (Either e (Maybe a)) where
|
||||
observeBool (Right (Just _)) = True
|
||||
observeBool _ = False
|
||||
observeFailure = Right Nothing
|
||||
|
|
|
@ -10,8 +10,8 @@ git annex addurl `[url ...]`
|
|||
|
||||
Downloads each url to its own file, which is added to the annex.
|
||||
|
||||
When `youtube-dl` is installed, it's used to download videos
|
||||
embedded on web pages.
|
||||
When `youtube-dl` is installed, it's used to check for a video embedded in
|
||||
a web page at the url, and that is added to the annex instead.
|
||||
|
||||
Urls to torrent files (including magnet links) will cause the content of
|
||||
the torrent to be downloaded, using `aria2c`.
|
||||
|
@ -28,10 +28,6 @@ be used to get better filenames.
|
|||
Avoid immediately downloading the url. The url is still checked
|
||||
(via HEAD) to verify that it exists, and to get its size if possible.
|
||||
|
||||
When `youtube-dl` is installed, videos embedded on web pages
|
||||
will be added. To avoid the extra work of checking for videos,
|
||||
add the `--raw` option.
|
||||
|
||||
* `--relaxed`
|
||||
|
||||
Don't immediately download the url, and avoid storing the size of the
|
||||
|
|
|
@ -547,6 +547,7 @@ Executable git-annex
|
|||
Annex.View.ViewedFile
|
||||
Annex.Wanted
|
||||
Annex.WorkTree
|
||||
Annex.YoutubeDl
|
||||
Assistant
|
||||
Assistant.Alert
|
||||
Assistant.Alert.Utility
|
||||
|
|
Loading…
Reference in a new issue