remove youtube-dl support, always use yt-dlp

The annex.youtube-dl-command git config is no longer used, git-annex always
runs the yt-dlp command, rather than the old youtube-dl command.

Sponsored-by: Leon Schuermann
This commit is contained in:
Joey Hess 2025-08-27 09:29:43 -04:00
commit 2a0ec700af
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 61 additions and 86 deletions

View file

@ -1,6 +1,6 @@
{- yt-dlp (and deprecated youtube-dl) integration for git-annex {- yt-dlp integration for git-annex
- -
- Copyright 2017-2024 Joey Hess <id@joeyh.name> - Copyright 2017-2025 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -41,7 +41,7 @@ import qualified Data.Aeson as Aeson
import GHC.Generics import GHC.Generics
import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Char8 as B8
-- youtube-dl can follow redirects to anywhere, including potentially -- yt-dlp can follow redirects to anywhere, including potentially
-- localhost or a private address. So, it's only allowed to download -- localhost or a private address. So, it's only allowed to download
-- content if the user has allowed access to all addresses. -- content if the user has allowed access to all addresses.
youtubeDlAllowed :: Annex Bool youtubeDlAllowed :: Annex Bool
@ -52,25 +52,21 @@ youtubeDlNotAllowedMessage = unwords
[ "This url is supported by yt-dlp, but" [ "This url is supported by yt-dlp, but"
, "yt-dlp could potentially access any address, and the" , "yt-dlp could potentially access any address, and the"
, "configuration of annex.security.allowed-ip-addresses" , "configuration of annex.security.allowed-ip-addresses"
, "does not allow that. Not using yt-dlp (or youtube-dl)." , "does not allow that. Not using yt-dlp."
] ]
-- Runs youtube-dl in a work directory, to download a single media file -- Runs yt-dlp in a work directory, to download a single media file
-- from the url. Returns the path to the media file in the work directory. -- from the url. Returns the path to the media file in the work directory.
-- --
-- Displays a progress meter as youtube-dl downloads. -- Displays a progress meter as yt-dlp downloads.
-- --
-- If no file is downloaded, returns Right Nothing. -- If no file is downloaded, returns Right Nothing.
-- --
-- youtube-dl can write to multiple files, either temporary files, or -- yt-dlp can write to multiple files, either temporary files, or
-- multiple videos found at the url, and git-annex needs only one file. -- multiple videos found at the url, and git-annex needs only one file.
-- So we need to find the destination file, and make sure there is not -- So we need to find the destination file, and make sure there is not
-- more than one. With yt-dlp use --print-to-file to make it record the -- more than one. With yt-dlp use --print-to-file to make it record the
-- file(s) it downloads. With youtube-dl, the best that can be done is -- file(s) it downloads.
-- to require that the work directory end up with only 1 file in it.
-- (This can fail, but youtube-dl is deprecated, and they closed my
-- issue requesting something like --print-to-file;
-- <https://github.com/rg3/youtube-dl/issues/14864>)
youtubeDl :: URLString -> OsPath -> MeterUpdate -> Annex (Either String (Maybe OsPath)) youtubeDl :: URLString -> OsPath -> MeterUpdate -> Annex (Either String (Maybe OsPath))
youtubeDl url workdir p = ifM ipAddressesUnlimited youtubeDl url workdir p = ifM ipAddressesUnlimited
( withUrlOptions Nothing $ youtubeDl' url workdir p ( withUrlOptions Nothing $ youtubeDl' url workdir p
@ -79,52 +75,48 @@ youtubeDl url workdir p = ifM ipAddressesUnlimited
youtubeDl' :: URLString -> OsPath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe OsPath)) youtubeDl' :: URLString -> OsPath -> MeterUpdate -> UrlOptions -> Annex (Either String (Maybe OsPath))
youtubeDl' url workdir p uo youtubeDl' url workdir p uo
| supportedScheme uo url = do | supportedScheme uo url =
cmd <- youtubeDlCommand ifM (liftIO $ inSearchPath youtubeDlCommand)
ifM (liftIO $ inSearchPath cmd) ( runcmd >>= \case
( runcmd cmd >>= \case Right True -> downloadedfiles >>= \case
Right True -> downloadedfiles cmd >>= \case
(f:[]) -> return $ (f:[]) -> return $
Right (Just (toOsPath f)) Right (Just (toOsPath f))
[] -> return (nofiles cmd) [] -> return nofiles
fs -> return (toomanyfiles cmd fs) fs -> return (toomanyfiles fs)
Right False -> workdirfiles >>= \case Right False -> workdirfiles >>= \case
[] -> return (Right Nothing) [] -> return (Right Nothing)
_ -> return (Left $ cmd ++ " download is incomplete. Run the command again to resume.") _ -> return (Left $ youtubeDlCommand ++ " download is incomplete. Run the command again to resume.")
Left msg -> return (Left msg) Left msg -> return (Left msg)
, return (Left $ cmd ++ " is not installed.") , return (Left $ youtubeDlCommand ++ " is not installed.")
) )
| otherwise = return (Right Nothing) | otherwise = return (Right Nothing)
where where
nofiles cmd = Left $ cmd ++ " did not put any media in its work directory, perhaps it's been configured to store files somewhere else?" nofiles = Left $ youtubeDlCommand ++ " did not put any media in its work directory, perhaps it's been configured to store files somewhere else?"
toomanyfiles cmd fs = Left $ cmd ++ " downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs toomanyfiles fs = Left $ youtubeDlCommand ++ " downloaded multiple media files; git-annex is only able to deal with one per url: " ++ show fs
downloadedfiles cmd downloadedfiles = liftIO $
| isytdlp cmd = liftIO $
(nub . lines <$> readFile (fromOsPath filelistfile)) (nub . lines <$> readFile (fromOsPath filelistfile))
`catchIO` (pure . const []) `catchIO` (pure . const [])
| otherwise = map fromOsPath <$> workdirfiles
workdirfiles = liftIO $ filter (/= filelistfile) workdirfiles = liftIO $ filter (/= filelistfile)
<$> (filterM doesFileExist =<< dirContents workdir) <$> (filterM doesFileExist =<< dirContents workdir)
filelistfile = workdir </> filelistfilebase filelistfile = workdir </> filelistfilebase
filelistfilebase = literalOsPath "git-annex-file-list-file" filelistfilebase = literalOsPath "git-annex-file-list-file"
isytdlp cmd = cmd == "yt-dlp" runcmd = youtubeDlMaxSize workdir >>= \case
runcmd cmd = youtubeDlMaxSize workdir >>= \case
Left msg -> return (Left msg) Left msg -> return (Left msg)
Right maxsize -> do Right maxsize -> do
opts <- youtubeDlOpts (dlopts cmd ++ maxsize) opts <- youtubeDlOpts (dlopts ++ maxsize)
oh <- mkOutputHandlerQuiet oh <- mkOutputHandlerQuiet
-- The size is unknown to start. Once youtube-dl -- The size is unknown to start. Once yt-dlp
-- outputs some progress, the meter will be updated -- outputs some progress, the meter will be updated
-- with the size, which is why it's important the -- with the size, which is why it's important the
-- meter is passed into commandMeter' -- meter is passed into commandMeter'
let unknownsize = Nothing :: Maybe FileSize let unknownsize = Nothing :: Maybe FileSize
ok <- metered (Just p) unknownsize Nothing $ \meter meterupdate -> ok <- metered (Just p) unknownsize Nothing $ \meter meterupdate ->
liftIO $ commandMeter' liftIO $ commandMeter'
(if isytdlp cmd then parseYtdlpProgress else parseYoutubeDlProgress) parseYtdlpProgress
oh (Just meter) meterupdate cmd opts oh (Just meter) meterupdate youtubeDlCommand opts
(\pr -> pr { cwd = Just (fromOsPath workdir) }) (\pr -> pr { cwd = Just (fromOsPath workdir) })
return (Right ok) return (Right ok)
dlopts cmd = dlopts =
[ Param url [ Param url
-- To make it only download one file when given a -- To make it only download one file when given a
-- page with a video and a playlist, download only the video. -- page with a video and a playlist, download only the video.
@ -134,22 +126,17 @@ youtubeDl' url workdir p uo
-- somewhat stable, but this is the only way to prevent -- somewhat stable, but this is the only way to prevent
-- it from downloading the whole playlist.) -- it from downloading the whole playlist.)
, Param "--playlist-items", Param "0" , Param "--playlist-items", Param "0"
] ++ -- Avoid warnings, which go to stderr and may
if isytdlp cmd -- mess up git-annex's display.
then , Param "--no-warnings"
-- Avoid warnings, which go to
-- stderr and may mess up
-- git-annex's display.
[ Param "--no-warnings"
, Param "--progress-template" , Param "--progress-template"
, Param progressTemplate , Param progressTemplate
, Param "--print-to-file" , Param "--print-to-file"
, Param "after_move:filepath" , Param "after_move:filepath"
, Param (fromOsPath filelistfilebase) , Param (fromOsPath filelistfilebase)
] ]
else []
-- To honor annex.diskreserve, ask youtube-dl to not download too -- To honor annex.diskreserve, ask yt-dlp to not download too
-- large a media file. Factors in other downloads that are in progress, -- large a media file. Factors in other downloads that are in progress,
-- and any files in the workdir that it may have partially downloaded -- and any files in the workdir that it may have partially downloaded
-- before. -- before.
@ -188,22 +175,22 @@ youtubeDlTo key url dest p = do
return Nothing return Nothing
return (fromMaybe False res) return (fromMaybe False res)
-- youtube-dl supports downloading urls that are not html pages, -- yt-dlp supports downloading urls that are not html pages,
-- but we don't want to use it for such urls, since they can be downloaded -- but we don't want to use it for such urls, since they can be downloaded
-- without it. So, this first downloads part of the content and checks -- without it. So, this first downloads part of the content and checks
-- if it's a html page; only then is youtube-dl used. -- if it's a html page; only then is yt-dlp used.
htmlOnly :: URLString -> a -> Annex a -> Annex a htmlOnly :: URLString -> a -> Annex a -> Annex a
htmlOnly url fallback a = withUrlOptions Nothing $ \uo -> htmlOnly url fallback a = withUrlOptions Nothing $ \uo ->
liftIO (downloadPartial url uo htmlPrefixLength) >>= \case liftIO (downloadPartial url uo htmlPrefixLength) >>= \case
Just bs | isHtmlBs bs -> a Just bs | isHtmlBs bs -> a
_ -> return fallback _ -> return fallback
-- Check if youtube-dl supports downloading content from an url. -- Check if yt-dlp supports downloading content from an url.
youtubeDlSupported :: URLString -> Annex Bool youtubeDlSupported :: URLString -> Annex Bool
youtubeDlSupported url = either (const False) id youtubeDlSupported url = either (const False) id
<$> withUrlOptions Nothing (youtubeDlCheck' url) <$> withUrlOptions Nothing (youtubeDlCheck' url)
-- Check if youtube-dl can find media in an url. -- Check if yt-dlp can find media in an url.
-- --
-- While this does not download anything, it checks youtubeDlAllowed -- While this does not download anything, it checks youtubeDlAllowed
-- for symmetry with youtubeDl; the check should not succeed if the -- for symmetry with youtubeDl; the check should not succeed if the
@ -218,11 +205,10 @@ youtubeDlCheck' :: URLString -> UrlOptions -> Annex (Either String Bool)
youtubeDlCheck' url uo youtubeDlCheck' url uo
| supportedScheme uo url = catchMsgIO $ htmlOnly url False $ do | supportedScheme uo url = catchMsgIO $ htmlOnly url False $ do
opts <- youtubeDlOpts [ Param url, Param "--simulate" ] opts <- youtubeDlOpts [ Param url, Param "--simulate" ]
cmd <- youtubeDlCommand liftIO $ snd <$> processTranscript youtubeDlCommand (toCommand opts) Nothing
liftIO $ snd <$> processTranscript cmd (toCommand opts) Nothing
| otherwise = return (Right False) | otherwise = return (Right False)
-- Ask youtube-dl for the filename of media in an url. -- Ask yt-dlp for the filename of media in an url.
-- --
-- (This is not always identical to the filename it uses when downloading.) -- (This is not always identical to the filename it uses when downloading.)
youtubeDlFileName :: URLString -> Annex (Either String OsPath) youtubeDlFileName :: URLString -> Annex (Either String OsPath)
@ -245,10 +231,11 @@ youtubeDlFileNameHtmlOnly' url uo
| otherwise = return nomedia | otherwise = return nomedia
where where
go = do go = do
-- Sometimes youtube-dl will fail with an ugly backtrace -- Sometimes yt-dlp will fail with an ugly backtrace
-- (eg, http://bugs.debian.org/874321) -- (eg, http://bugs.debian.org/874321)
-- so catch stderr as well as stdout to avoid the user -- so catch stderr as well as stdout to avoid the user
-- seeing it. --no-warnings avoids warning messages that -- seeing it.
-- --no-warnings avoids warning messages that
-- are output to stdout. -- are output to stdout.
opts <- youtubeDlOpts opts <- youtubeDlOpts
[ Param url [ Param url
@ -256,8 +243,7 @@ youtubeDlFileNameHtmlOnly' url uo
, Param "--no-warnings" , Param "--no-warnings"
, Param "--no-playlist" , Param "--no-playlist"
] ]
cmd <- youtubeDlCommand let p = (proc youtubeDlCommand (toCommand opts))
let p = (proc cmd (toCommand opts))
{ std_out = CreatePipe { std_out = CreatePipe
, std_err = CreatePipe , std_err = CreatePipe
} }
@ -284,22 +270,17 @@ youtubeDlOpts addopts = do
opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig
return (opts ++ addopts) return (opts ++ addopts)
youtubeDlCommand :: Annex String youtubeDlCommand :: String
youtubeDlCommand = annexYoutubeDlCommand <$> Annex.getGitConfig >>= \case youtubeDlCommand = "yt-dlp"
Just c -> pure c
Nothing -> ifM (liftIO $ inSearchPath "yt-dlp")
( return "yt-dlp"
, return "youtube-dl"
)
supportedScheme :: UrlOptions -> URLString -> Bool supportedScheme :: UrlOptions -> URLString -> Bool
supportedScheme uo url = case parseURIRelaxed url of supportedScheme uo url = case parseURIRelaxed url of
Nothing -> False Nothing -> False
Just u -> case uriScheme u of Just u -> case uriScheme u of
-- avoid ugly message from youtube-dl about not supporting file: -- avoid ugly message from yt-dlp about not supporting file:
"file:" -> False "file:" -> False
-- ftp indexes may look like html pages, and there's no point -- ftp indexes may look like html pages, and there's no point
-- involving youtube-dl in a ftp download -- involving yt-dlp in a ftp download
"ftp:" -> False "ftp:" -> False
_ -> allowedScheme uo u _ -> allowedScheme uo u
@ -346,16 +327,9 @@ parseYoutubeDlProgress _ = (Nothing, Nothing, "")
- download content. - download content.
-} -}
youtubePlaylist :: URLString -> Annex (Either String [YoutubePlaylistItem]) youtubePlaylist :: URLString -> Annex (Either String [YoutubePlaylistItem])
youtubePlaylist url = do youtubePlaylist url = liftIO $ withTmpFile (literalOsPath "yt-dlp") $ \tmpfile h -> do
cmd <- youtubeDlCommand
if cmd == "yt-dlp"
then liftIO $ youtubePlaylist' url cmd
else return $ Left $ "Scraping needs yt-dlp, but git-annex has been configured to use " ++ cmd
youtubePlaylist' :: URLString -> String -> IO (Either String [YoutubePlaylistItem])
youtubePlaylist' url cmd = withTmpFile (literalOsPath "yt-dlp") $ \tmpfile h -> do
hClose h hClose h
(outerr, ok) <- processTranscript cmd (outerr, ok) <- processTranscript youtubeDlCommand
[ "--simulate" [ "--simulate"
, "--flat-playlist" , "--flat-playlist"
-- Skip live videos in progress -- Skip live videos in progress

View file

@ -32,6 +32,8 @@ git-annex (10.20250722) UNRELEASED; urgency=medium
Fixes reversion introduced in version 7.20191230. Fixes reversion introduced in version 7.20191230.
* Improved error message when yt-dlp is not installed and is needed to * Improved error message when yt-dlp is not installed and is needed to
get a file from the web. get a file from the web.
* The annex.youtube-dl-command git config is no longer used, git-annex
always runs the yt-dlp command, rather than the old youtube-dl command.
* Removed support for git versions older than 2.22. * Removed support for git versions older than 2.22.
* Bump aws build dependency to 0.24.1. * Bump aws build dependency to 0.24.1.
* stack.yaml: Update to lts-24.2. * stack.yaml: Update to lts-24.2.

View file

@ -376,8 +376,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
where where
dl dest = withTmpWorkDir mediakey $ \workdir -> do dl dest = withTmpWorkDir mediakey $ \workdir -> do
let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith removeFile) let cleanuptmp = pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith removeFile)
dlcmd <- youtubeDlCommand showNote ("using " <> UnquotedString youtubeDlCommand)
showNote ("using " <> UnquotedString dlcmd)
Transfer.notifyTransfer Transfer.Download url $ Transfer.notifyTransfer Transfer.Download url $
Transfer.download' webUUID mediakey (AssociatedFile Nothing) Nothing Transfer.noRetry $ \p -> do Transfer.download' webUUID mediakey (AssociatedFile Nothing) Nothing Transfer.noRetry $ \p -> do
showDestinationFile dest showDestinationFile dest
@ -393,7 +392,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
return Nothing return Nothing
Right Nothing -> do Right Nothing -> do
cleanuptmp cleanuptmp
warning (UnquotedString dlcmd <> " did not download anything") warning (UnquotedString youtubeDlCommand <> " did not download anything")
return Nothing return Nothing
mediaurl = setDownloader url YoutubeDownloader mediaurl = setDownloader url YoutubeDownloader
mediakey = Backend.URL.fromUrl mediaurl Nothing (verifiableOption o) mediakey = Backend.URL.fromUrl mediaurl Nothing (verifiableOption o)

View file

@ -2110,8 +2110,8 @@ Remotes are configured using these settings in `.git/config`.
* `annex.youtube-dl-options` * `annex.youtube-dl-options`
Options to pass to yt-dlp (or deprecated youtube-dl) when using it to Options to pass to yt-dlp when using it to find the url to download
find the url to download for a video. for a video.
Some options may break git-annex's integration with yt-dlp. For Some options may break git-annex's integration with yt-dlp. For
example, the --output option could cause it to store files somewhere example, the --output option could cause it to store files somewhere
@ -2121,8 +2121,8 @@ Remotes are configured using these settings in `.git/config`.
* `annex.youtube-dl-command` * `annex.youtube-dl-command`
Default is to use "yt-dlp" or if that is not available in the PATH, Deprecated and unused. git-annex always runs "yt-dlp", since the
to use "youtube-dl". "youtube-dl" command has been deprecated.
* `annex.aria-torrent-options` * `annex.aria-torrent-options`