Improve resuming interrupted download when using yt-dlp
Sometimes resuming an interrupted download will fail to resume and download more files with different names. That resulted in the workdir having multiple files at the end, which causes git-annex to give up because it does not know what was downloaded. To fix this, use a yt-dlp feature, which appends to a file the name of each file after it's finished downloading it. So the presence of other cruft in the workdir will not confuse git-annex.
This commit is contained in:
parent
90d410b382
commit
a36a81dea3
2 changed files with 48 additions and 30 deletions
|
@ -48,16 +48,18 @@ youtubeDlNotAllowedMessage = unwords
|
||||||
--
|
--
|
||||||
-- Displays a progress meter as youtube-dl downloads.
|
-- Displays a progress meter as youtube-dl downloads.
|
||||||
--
|
--
|
||||||
-- If youtube-dl fails without writing any files to the work directory,
|
-- If no file is downloaded, or the program is not installed,
|
||||||
-- or is not installed, returns Right Nothing.
|
-- returns Right Nothing.
|
||||||
--
|
--
|
||||||
-- The work directory can contain files from a previous run of youtube-dl
|
-- youtube-dl can write to multiple files, either temporary files, or
|
||||||
-- and it will resume. It should not contain any other files though,
|
-- multiple videos found at the url, and git-annex needs only one file.
|
||||||
-- and youtube-dl needs to finish up with only one file in the directory
|
-- So we need to find the destination file, and make sure there is not
|
||||||
-- so we know which one it downloaded.
|
-- more than one. With yt-dlp use --print-to-file to make it record the
|
||||||
--
|
-- file(s) it downloads. With youtube-dl, the best that can be done is
|
||||||
-- (Note that we can't use --output to specify the file to download to,
|
-- to require that the work directory end up with only 1 file in it.
|
||||||
-- due to <https://github.com/rg3/youtube-dl/issues/14864>)
|
-- (This can fail, but youtube-dl is deprecated, and they closed my
|
||||||
|
-- issue requesting something like --print-to-file;
|
||||||
|
-- <https://github.com/rg3/youtube-dl/issues/14864>)
|
||||||
youtubeDl :: URLString -> FilePath -> MeterUpdate -> Annex (Either String (Maybe FilePath))
|
youtubeDl :: URLString -> FilePath -> MeterUpdate -> Annex (Either String (Maybe FilePath))
|
||||||
youtubeDl url workdir p = ifM ipAddressesUnlimited
|
youtubeDl url workdir p = ifM ipAddressesUnlimited
|
||||||
( withUrlOptions $ youtubeDl' url workdir p
|
( withUrlOptions $ youtubeDl' url workdir p
|
||||||
|
@ -66,29 +68,38 @@ youtubeDl url workdir p = ifM ipAddressesUnlimited
|
||||||
|
|
||||||
youtubeDl' :: URLString -> FilePath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe FilePath))
|
youtubeDl' :: URLString -> FilePath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe FilePath))
|
||||||
youtubeDl' url workdir p uo
|
youtubeDl' url workdir p uo
|
||||||
| supportedScheme uo url = ifM (liftIO . inSearchPath =<< youtubeDlCommand)
|
| supportedScheme uo url = do
|
||||||
( runcmd >>= \case
|
cmd <- youtubeDlCommand
|
||||||
Right True -> workdirfiles >>= \case
|
ifM (liftIO $ inSearchPath cmd)
|
||||||
(f:[]) -> return (Right (Just f))
|
( runcmd cmd >>= \case
|
||||||
[] -> return nofiles
|
Right True -> downloadedfiles cmd >>= \case
|
||||||
fs -> return (toomanyfiles fs)
|
(f:[]) -> return (Right (Just f))
|
||||||
Right False -> workdirfiles >>= \case
|
[] -> return nofiles
|
||||||
[] -> return (Right Nothing)
|
fs -> return (toomanyfiles fs)
|
||||||
_ -> return (Left "yt-dlp download is incomplete. Run the command again to resume.")
|
Right False -> workdirfiles >>= \case
|
||||||
Left msg -> return (Left msg)
|
[] -> return (Right Nothing)
|
||||||
, return (Right Nothing)
|
_ -> return (Left "yt-dlp download is incomplete. Run the command again to resume.")
|
||||||
)
|
Left msg -> return (Left msg)
|
||||||
|
, return (Right Nothing)
|
||||||
|
)
|
||||||
| otherwise = return (Right Nothing)
|
| otherwise = return (Right Nothing)
|
||||||
where
|
where
|
||||||
nofiles = Left "yt-dlp did not put any media in its work directory, perhaps it's been configured to store files somewhere else?"
|
nofiles = Left "yt-dlp did not put any media in its work directory, perhaps it's been configured to store files somewhere else?"
|
||||||
toomanyfiles fs = Left $ "yt-dlp downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs
|
toomanyfiles fs = Left $ "yt-dlp downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs
|
||||||
workdirfiles = liftIO $ filterM (doesFileExist) =<< dirContents workdir
|
downloadedfiles cmd
|
||||||
runcmd = youtubeDlMaxSize workdir >>= \case
|
| isytdlp cmd = liftIO $
|
||||||
|
(lines <$> readFile filelistfile)
|
||||||
|
`catchIO` (pure . const [])
|
||||||
|
| otherwise = workdirfiles
|
||||||
|
workdirfiles = liftIO $ filter (/= filelistfile)
|
||||||
|
<$> (filterM (doesFileExist) =<< dirContents workdir)
|
||||||
|
filelistfile = workdir </> filelistfilebase
|
||||||
|
filelistfilebase = "git-annex-file-list-file"
|
||||||
|
isytdlp cmd = "yt-dlp" `isInfixOf` cmd
|
||||||
|
runcmd cmd = youtubeDlMaxSize workdir >>= \case
|
||||||
Left msg -> return (Left msg)
|
Left msg -> return (Left msg)
|
||||||
Right maxsize -> do
|
Right maxsize -> do
|
||||||
cmd <- youtubeDlCommand
|
opts <- youtubeDlOpts (dlopts cmd ++ maxsize)
|
||||||
let isytdlp = "yt-dlp" `isInfixOf` cmd
|
|
||||||
opts <- youtubeDlOpts (dlopts isytdlp ++ maxsize)
|
|
||||||
oh <- mkOutputHandlerQuiet
|
oh <- mkOutputHandlerQuiet
|
||||||
-- The size is unknown to start. Once youtube-dl
|
-- The size is unknown to start. Once youtube-dl
|
||||||
-- outputs some progress, the meter will be updated
|
-- outputs some progress, the meter will be updated
|
||||||
|
@ -97,11 +108,11 @@ youtubeDl' url workdir p uo
|
||||||
let unknownsize = Nothing :: Maybe FileSize
|
let unknownsize = Nothing :: Maybe FileSize
|
||||||
ok <- metered (Just p) unknownsize Nothing $ \meter meterupdate ->
|
ok <- metered (Just p) unknownsize Nothing $ \meter meterupdate ->
|
||||||
liftIO $ commandMeter'
|
liftIO $ commandMeter'
|
||||||
(if isytdlp then parseYtdlpProgress else parseYoutubeDlProgress)
|
(if isytdlp cmd then parseYtdlpProgress else parseYoutubeDlProgress)
|
||||||
oh (Just meter) meterupdate cmd opts
|
oh (Just meter) meterupdate cmd opts
|
||||||
(\pr -> pr { cwd = Just workdir })
|
(\pr -> pr { cwd = Just workdir })
|
||||||
return (Right ok)
|
return (Right ok)
|
||||||
dlopts isytdlp =
|
dlopts cmd =
|
||||||
[ Param url
|
[ Param url
|
||||||
-- To make it only download one file when given a
|
-- To make it only download one file when given a
|
||||||
-- page with a video and a playlist, download only the video.
|
-- page with a video and a playlist, download only the video.
|
||||||
|
@ -112,8 +123,14 @@ youtubeDl' url workdir p uo
|
||||||
-- it from downloading the whole playlist.)
|
-- it from downloading the whole playlist.)
|
||||||
, Param "--playlist-items", Param "0"
|
, Param "--playlist-items", Param "0"
|
||||||
] ++
|
] ++
|
||||||
if isytdlp
|
if isytdlp cmd
|
||||||
then [Param "--progress-template", Param progressTemplate]
|
then
|
||||||
|
[ Param "--progress-template"
|
||||||
|
, Param progressTemplate
|
||||||
|
, Param "--print-to-file"
|
||||||
|
, Param "after_move:filepath"
|
||||||
|
, Param filelistfilebase
|
||||||
|
]
|
||||||
else []
|
else []
|
||||||
|
|
||||||
-- To honor annex.diskreserve, ask youtube-dl to not download too
|
-- To honor annex.diskreserve, ask youtube-dl to not download too
|
||||||
|
|
|
@ -93,6 +93,7 @@ git-annex (10.20230408) UNRELEASED; urgency=medium
|
||||||
* Fix display when run with -J1.
|
* Fix display when run with -J1.
|
||||||
* assistant: Fix a crash when a small file is deleted immediately after
|
* assistant: Fix a crash when a small file is deleted immediately after
|
||||||
being created.
|
being created.
|
||||||
|
* Improve resuming interrupted download when using yt-dlp.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Sat, 08 Apr 2023 13:57:18 -0400
|
-- Joey Hess <id@joeyh.name> Sat, 08 Apr 2023 13:57:18 -0400
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue