diff --git a/Annex/Content.hs b/Annex/Content.hs index d14bc06428..9f3722c609 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -943,18 +943,8 @@ downloadUrl k p urls file = -- Poll the file to handle configurations where an external -- download command is used. meteredFile file (Just p) k $ - go =<< annexWebDownloadCommand <$> Annex.getGitConfig - where - go Nothing = Url.withUrlOptions $ \uo -> - liftIO $ anyM (\u -> Url.download p u file uo) urls - go (Just basecmd) = anyM (downloadcmd basecmd) urls - downloadcmd basecmd url = - progressCommand "sh" [Param "-c", Param $ gencmd url basecmd] - <&&> liftIO (doesFileExist file) - gencmd url = massReplace - [ ("%file", shellEscape file) - , ("%url", shellEscape url) - ] + Url.withUrlOptions $ \uo -> + liftIO $ anyM (\u -> Url.download p u file uo) urls {- Copies a key's content, when present, to a temp file. - This is used to speed up some rsyncs. -} diff --git a/Annex/Url.hs b/Annex/Url.hs index 0d65a3eb27..65f53d82d9 100644 --- a/Annex/Url.hs +++ b/Annex/Url.hs @@ -39,6 +39,7 @@ getUrlOptions = Annex.getState Annex.urloptions >>= \case <*> headers <*> options <*> liftIO (U.newManager U.managerSettings) + <*> (annexAllowedUrlSchemes <$> Annex.getGitConfig) headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd]) Nothing -> annexHttpHeaders <$> Annex.getGitConfig diff --git a/Annex/YoutubeDl.hs b/Annex/YoutubeDl.hs index 425d06dfb4..4f8e699c23 100644 --- a/Annex/YoutubeDl.hs +++ b/Annex/YoutubeDl.hs @@ -11,7 +11,7 @@ module Annex.YoutubeDl ( youtubeDlSupported, youtubeDlCheck, youtubeDlFileName, - youtubeDlFileName', + youtubeDlFileNameHtmlOnly, ) where import Annex.Common @@ -41,8 +41,11 @@ import Control.Concurrent.Async -- (Note that we can't use --output to specifiy the file to download to, -- due to ) youtubeDl :: URLString -> FilePath -> Annex (Either String (Maybe FilePath)) -youtubeDl url workdir - | supportedScheme url = ifM (liftIO $ inPath "youtube-dl") +youtubeDl url workdir = withUrlOptions $ youtubeDl' url workdir + +youtubeDl' :: URLString -> FilePath -> UrlOptions -> Annex (Either String (Maybe FilePath)) +youtubeDl' url workdir uo + | supportedScheme uo url = ifM (liftIO $ inPath "youtube-dl") ( runcmd >>= \case Right True -> workdirfiles >>= \case (f:[]) -> return (Right (Just f)) @@ -134,8 +137,11 @@ youtubeDlSupported url = either (const False) id <$> youtubeDlCheck url -- Check if youtube-dl can find media in an url. youtubeDlCheck :: URLString -> Annex (Either String Bool) -youtubeDlCheck url - | supportedScheme url = catchMsgIO $ htmlOnly url False $ do +youtubeDlCheck = withUrlOptions . youtubeDlCheck' + +youtubeDlCheck' :: URLString -> UrlOptions -> Annex (Either String Bool) +youtubeDlCheck' url uo + | supportedScheme uo url = catchMsgIO $ htmlOnly url False $ do opts <- youtubeDlOpts [ Param url, Param "--simulate" ] liftIO $ snd <$> processTranscript "youtube-dl" (toCommand opts) Nothing | otherwise = return (Right False) @@ -144,18 +150,22 @@ youtubeDlCheck url -- -- (This is not always identical to the filename it uses when downloading.) youtubeDlFileName :: URLString -> Annex (Either String FilePath) -youtubeDlFileName url - | supportedScheme url = flip catchIO (pure . Left . show) $ - htmlOnly url nomedia (youtubeDlFileName' url) - | otherwise = return nomedia +youtubeDlFileName url = withUrlOptions go where + go uo + | supportedScheme uo url = flip catchIO (pure . Left . show) $ + htmlOnly url nomedia (youtubeDlFileNameHtmlOnly' url uo) + | otherwise = return nomedia nomedia = Left "no media in url" -- Does not check if the url contains htmlOnly; use when that's already -- been verified. -youtubeDlFileName' :: URLString -> Annex (Either String FilePath) -youtubeDlFileName' url - | supportedScheme url = flip catchIO (pure . Left . show) go +youtubeDlFileNameHtmlOnly :: URLString -> Annex (Either String FilePath) +youtubeDlFileNameHtmlOnly = withUrlOptions . youtubeDlFileNameHtmlOnly' + +youtubeDlFileNameHtmlOnly' :: URLString -> UrlOptions -> Annex (Either String FilePath) +youtubeDlFileNameHtmlOnly' url uo + | supportedScheme uo url = flip catchIO (pure . Left . show) go | otherwise = return nomedia where go = do @@ -189,12 +199,13 @@ youtubeDlOpts addopts = do opts <- map Param . annexYoutubeDlOptions <$> Annex.getGitConfig return (opts ++ addopts) -supportedScheme :: URLString -> Bool -supportedScheme url = case uriScheme <$> parseURIRelaxed url of +supportedScheme :: UrlOptions -> URLString -> Bool +supportedScheme uo url = case parseURIRelaxed url of Nothing -> False - -- avoid ugly message from youtube-dl about not supporting file: - Just "file:" -> False - -- ftp indexes may look like html pages, and there's no point - -- involving youtube-dl in a ftp download - Just "ftp:" -> False - Just _ -> True + Just u -> case uriScheme u of + -- avoid ugly message from youtube-dl about not supporting file: + "file:" -> False + -- ftp indexes may look like html pages, and there's no point + -- involving youtube-dl in a ftp download + "ftp:" -> False + _ -> allowedScheme uo u diff --git a/CHANGELOG b/CHANGELOG index 6e14024bf7..cfa836c4c3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,15 @@ +git-annex (6.20180622) UNRELEASED; urgency=high + + * Added annex.security.allowed-url-schemes setting, which defaults + to only allowing http and https URLs. Note especially that file:/ + is no longer enabled by default. This is a security fix. + * Removed annex.web-download-command, since its interface does not allow + supporting annex.security.allowed-url-schemes across redirects. + If you used this setting, you may want to instead use annex.web-options + to pass options to curl. + + -- Joey Hess Wed, 30 May 2018 11:49:08 -0400 + git-annex (6.20180530) UNRELEASED; urgency=medium * Fix build with ghc 8.4+, which broke due to the Semigroup Monoid change. diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 4721599248..156dbafcd4 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -277,7 +277,7 @@ downloadWeb o url urlinfo file = -- Ask youtube-dl what filename it will download -- first, and check if that is already an annexed file, -- to avoid unnecessary work in that case. - | otherwise = youtubeDlFileName' url >>= \case + | otherwise = youtubeDlFileNameHtmlOnly url >>= \case Right dest -> ifAnnexed dest (alreadyannexed dest) (dl dest) diff --git a/NEWS b/NEWS index a1272192d3..ce6f481af7 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,14 @@ +git-annex (6.20180622) upstream; urgency=high + + A security fix has changed git-annex to only support http and https + URL schemes by default. You can enable other URL schemes, at your own risk, + using annex.security.allowed-url-schemes. + + The annex.web-download-command configuration has been removed, + use annex.web-options instead. + + -- Joey Hess Fri, 15 Jun 2018 17:54:23 -0400 + git-annex (6.20180309) upstream; urgency=medium Note that, due to not using rsync to transfer files over ssh diff --git a/Test.hs b/Test.hs index 659ebf17e1..463f06e4dd 100644 --- a/Test.hs +++ b/Test.hs @@ -1714,10 +1714,12 @@ test_add_subdirs = intmpclonerepo $ do test_addurl :: Assertion test_addurl = intmpclonerepo $ do -- file:// only; this test suite should not hit the network + let filecmd c ps = git_annex c ("-cannex.security.allowed-url-schemes=file" : ps) f <- absPath "myurl" let url = replace "\\" "/" ("file:///" ++ dropDrive f) writeFile f "foo" - git_annex "addurl" [url] @? ("addurl failed on " ++ url) + not <$> git_annex "addurl" [url] @? "addurl failed to fail on file url" + filecmd "addurl" [url] @? ("addurl failed on " ++ url) let dest = "addurlurldest" - git_annex "addurl" ["--file", dest, url] @? ("addurl failed on " ++ url ++ " with --file") + filecmd "addurl" ["--file", dest, url] @? ("addurl failed on " ++ url ++ " with --file") doesFileExist dest @? (dest ++ " missing after addurl --file") diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 508d02f4d9..e7e9305801 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -33,8 +33,10 @@ import Config.DynamicConfig import Utility.HumanTime import Utility.Gpg (GpgCmd, mkGpgCmd) import Utility.ThreadScheduler (Seconds(..)) +import Utility.Url (Scheme, mkScheme) import Control.Concurrent.STM +import qualified Data.Set as S -- | A configurable value, that may not be fully determined yet because -- the global git config has not yet been loaded. @@ -71,7 +73,6 @@ data GitConfig = GitConfig , annexWebOptions :: [String] , annexYoutubeDlOptions :: [String] , annexAriaTorrentOptions :: [String] - , annexWebDownloadCommand :: Maybe String , annexCrippledFileSystem :: Bool , annexLargeFiles :: Maybe String , annexAddSmallFiles :: Bool @@ -93,6 +94,7 @@ data GitConfig = GitConfig , annexSecureHashesOnly :: Bool , annexRetry :: Maybe Integer , annexRetryDelay :: Maybe Seconds + , annexAllowedUrlSchemes :: S.Set Scheme , coreSymlinks :: Bool , coreSharedRepository :: SharedRepository , receiveDenyCurrentBranch :: DenyCurrentBranch @@ -133,7 +135,6 @@ extractGitConfig r = GitConfig , annexWebOptions = getwords (annex "web-options") , annexYoutubeDlOptions = getwords (annex "youtube-dl-options") , annexAriaTorrentOptions = getwords (annex "aria-torrent-options") - , annexWebDownloadCommand = getmaybe (annex "web-download-command") , annexCrippledFileSystem = getbool (annex "crippledfilesystem") False , annexLargeFiles = getmaybe (annex "largefiles") , annexAddSmallFiles = getbool (annex "addsmallfiles") True @@ -159,6 +160,9 @@ extractGitConfig r = GitConfig , annexRetry = getmayberead (annex "retry") , annexRetryDelay = Seconds <$> getmayberead (annex "retrydelay") + , annexAllowedUrlSchemes = S.fromList $ map mkScheme $ + maybe ["http", "https"] words $ + getmaybe (annex "security.allowed-url-schemes") , coreSymlinks = getbool "core.symlinks" True , coreSharedRepository = getSharedRepository r , receiveDenyCurrentBranch = getDenyCurrentBranch r diff --git a/Utility/Url.hs b/Utility/Url.hs index d959ba023e..e094518ddc 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -15,6 +15,9 @@ module Utility.Url ( managerSettings, URLString, UserAgent, + Scheme, + mkScheme, + allowedScheme, UrlOptions(..), defUrlOptions, mkUrlOptions, @@ -41,6 +44,7 @@ import qualified Data.CaseInsensitive as CI import qualified Data.ByteString as B import qualified Data.ByteString.UTF8 as B8 import qualified Data.ByteString.Lazy as L +import qualified Data.Set as S import Control.Monad.Trans.Resource import Network.HTTP.Conduit import Network.HTTP.Client (brRead, withResponse) @@ -65,12 +69,22 @@ type Headers = [String] type UserAgent = String +newtype Scheme = Scheme (CI.CI String) + deriving (Eq, Ord) + +mkScheme :: String -> Scheme +mkScheme = Scheme . CI.mk + +fromScheme :: Scheme -> String +fromScheme (Scheme s) = CI.original s + data UrlOptions = UrlOptions { userAgent :: Maybe UserAgent , reqHeaders :: Headers , urlDownloader :: UrlDownloader , applyRequest :: Request -> Request , httpManager :: Manager + , allowedSchemes :: S.Set Scheme } data UrlDownloader @@ -84,8 +98,9 @@ defUrlOptions = UrlOptions <*> pure DownloadWithConduit <*> pure id <*> newManager managerSettings + <*> pure (S.fromList $ map mkScheme ["http", "https"]) -mkUrlOptions :: Maybe UserAgent -> Headers -> [CommandParam] -> Manager -> UrlOptions +mkUrlOptions :: Maybe UserAgent -> Headers -> [CommandParam] -> Manager -> S.Set Scheme -> UrlOptions mkUrlOptions defuseragent reqheaders reqparams manager = UrlOptions useragent reqheaders urldownloader applyrequest manager where @@ -115,7 +130,7 @@ mkUrlOptions defuseragent reqheaders reqparams manager = _ -> (h', B8.fromString v) curlParams :: UrlOptions -> [CommandParam] -> [CommandParam] -curlParams uo ps = ps ++ uaparams ++ headerparams ++ addedparams +curlParams uo ps = ps ++ uaparams ++ headerparams ++ addedparams ++ schemeparams where uaparams = case userAgent uo of Nothing -> [] @@ -124,6 +139,25 @@ curlParams uo ps = ps ++ uaparams ++ headerparams ++ addedparams addedparams = case urlDownloader uo of DownloadWithConduit -> [] DownloadWithCurl l -> l + schemeparams = + [ Param "--proto" + , Param $ intercalate "," ("-all" : schemelist) + ] + schemelist = map fromScheme $ S.toList $ allowedSchemes uo + +checkPolicy :: UrlOptions -> URI -> a -> IO a -> IO a +checkPolicy uo u onerr a + | allowedScheme uo u = a + | otherwise = do + hPutStrLn stderr $ + "Configuration does not allow accessing " ++ show u + hFlush stderr + return onerr + +allowedScheme :: UrlOptions -> URI -> Bool +allowedScheme uo u = uscheme `S.member` allowedSchemes uo + where + uscheme = mkScheme $ takeWhile (/=':') (uriScheme u) {- Checks that an url exists and could be successfully downloaded, - also checking that its size, if available, matches a specified size. -} @@ -158,7 +192,8 @@ assumeUrlExists = UrlInfo True Nothing Nothing - also returning its size and suggested filename if available. -} getUrlInfo :: URLString -> UrlOptions -> IO UrlInfo getUrlInfo url uo = case parseURIRelaxed url of - Just u -> case (urlDownloader uo, parseUrlConduit (show u)) of + Just u -> checkPolicy uo u dne $ + case (urlDownloader uo, parseUrlConduit (show u)) of (DownloadWithConduit, Just req) -> catchJust -- When http redirects to a protocol which -- conduit does not support, it will throw @@ -166,7 +201,7 @@ getUrlInfo url uo = case parseURIRelaxed url of (matchStatusCodeException (== found302)) (existsconduit req) (const (existscurl u)) - `catchNonAsync` (const dne) + `catchNonAsync` (const $ return dne) -- http-conduit does not support file:, ftp:, etc urls, -- so fall back to reading files and using curl. _ @@ -177,11 +212,11 @@ getUrlInfo url uo = case parseURIRelaxed url of Just stat -> do sz <- getFileSize' f stat found (Just sz) Nothing - Nothing -> dne + Nothing -> return dne | otherwise -> existscurl u - Nothing -> dne + Nothing -> return dne where - dne = return $ UrlInfo False Nothing Nothing + dne = UrlInfo False Nothing Nothing found sz f = return $ UrlInfo True sz f curlparams = curlParams uo $ @@ -213,7 +248,7 @@ getUrlInfo url uo = case parseURIRelaxed url of then found (extractlen resp) (extractfilename resp) - else dne + else return dne existscurl u = do output <- catchDefaultIO "" $ @@ -230,7 +265,7 @@ getUrlInfo url uo = case parseURIRelaxed url of -- don't try to parse ftp status codes; if curl -- got a length, it's good _ | isftp && isJust len -> good - _ -> dne + _ -> return dne -- Parse eg: attachment; filename="fname.ext" -- per RFC 2616 @@ -265,7 +300,8 @@ download meterupdate url file uo = `catchNonAsync` showerr where go = case parseURIRelaxed url of - Just u -> case (urlDownloader uo, parseUrlConduit (show u)) of + Just u -> checkPolicy uo u False $ + case (urlDownloader uo, parseUrlConduit (show u)) of (DownloadWithConduit, Just req) -> catchJust -- When http redirects to a protocol which -- conduit does not support, it will throw diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index f829f07f63..756ba5a021 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1387,12 +1387,20 @@ Here are all the supported configuration settings. If set, the command is run and each line of its output is used as a HTTP header. This overrides annex.http-headers. -* `annex.web-download-command` +* `annex.security.allowed-url-schemes` - Use to specify a command to run to download a file from the web. + List of URL schemes that git-annex is allowed to download content from. + The default is "http https". - In the command line, %url is replaced with the url to download, - and %file is replaced with the file that it should be saved to. + Think very carefully before changing this; there are security + implications. For example, if it's changed to allow "file" URLs, + then anyone who can get a commit into your git-annex repository + could add a pointer to a private file located outside that repository, + risking it being copied into the repository and transferred on to other + remotes, exposing its content. + + Some special remotes support their own domain-specific URL + schemes; those are not affected by this configuration setting. * `annex.secure-erase-command` diff --git a/doc/special_remotes/external/git-annex-remote-torrent b/doc/special_remotes/external/git-annex-remote-torrent index d7897d7726..a3b2bb8580 100755 --- a/doc/special_remotes/external/git-annex-remote-torrent +++ b/doc/special_remotes/external/git-annex-remote-torrent @@ -123,7 +123,7 @@ while read line; do url="$2" # List contents of torrent. tmp=$(mktemp) - if ! runcmd curl -o "$tmp" "$url"; then + if ! runcmd curl --proto -all,http,https -o "$tmp" "$url"; then echo CHECKURL-FAILURE else oldIFS="$IFS" @@ -166,7 +166,7 @@ while read line; do echo TRANSFER-FAILURE RETRIEVE "$key" "no known torrent urls for this key" else tmp=$(mktemp) - if ! runcmd curl -o "$tmp" "$url"; then + if ! runcmd curl --proto -all,http,https -o "$tmp" "$url"; then echo TRANSFER-FAILURE RETRIEVE "$key" "failed downloading torrent file from $url" else filenum="$(echo "$url" | sed 's/(.*#\(\d*\)/\1/')" diff --git a/doc/special_remotes/web.mdwn b/doc/special_remotes/web.mdwn index cd20a93bb1..8bb4091704 100644 --- a/doc/special_remotes/web.mdwn +++ b/doc/special_remotes/web.mdwn @@ -6,6 +6,6 @@ See [[tips/using_the_web_as_a_special_remote]] for usage examples. Currently git-annex only supports downloading content from the web; it cannot upload to it or remove content. -This special remote uses arbitrary urls on the web as the source for content. +This special remote uses urls on the web as the source for content. git-annex can also download content from a normal git remote, accessible by http. diff --git a/doc/special_remotes/web/comment_2_333141cc9ec6c26ffd19aa95303a91e3._comment b/doc/special_remotes/web/comment_2_333141cc9ec6c26ffd19aa95303a91e3._comment index ff20181175..456186a67d 100644 --- a/doc/special_remotes/web/comment_2_333141cc9ec6c26ffd19aa95303a91e3._comment +++ b/doc/special_remotes/web/comment_2_333141cc9ec6c26ffd19aa95303a91e3._comment @@ -5,4 +5,7 @@ date="2013-08-17T08:59:11Z" content=""" When it says \"arbitrary urls\", it means it. The only requirement is that the url be well formed and that wget or whatever command you have it configured to use via annex.web-download-command knows how to download it. + +Update 2018: That used to be the case, but it's now limited by default to +http and https urls. """]] diff --git a/doc/tips/Using_Git-annex_as_a_web_browsing_assistant.mdwn b/doc/tips/Using_Git-annex_as_a_web_browsing_assistant.mdwn index 4ee023de3e..e8239abd40 100644 --- a/doc/tips/Using_Git-annex_as_a_web_browsing_assistant.mdwn +++ b/doc/tips/Using_Git-annex_as_a_web_browsing_assistant.mdwn @@ -11,12 +11,6 @@ The first step is to install the Firefox plugin [FlashGot](http://flashgot.net/). We will use it to provide the Firefox shortcuts to add things to our annex. -We also need a normal download manager, if we want to get status updates as -the download is done. We'll need to configure git-annex to use it by -setting `annex.web-download-command` as Joey describes in his comment on -[[todo/wishlist: allow configuration of downloader for addurl]]. See the -manpage [[git-annex]] for more information on setting configuration. - Once we have installed all that, we need a script that has an interface which FlashGot can treat as a downloader, but which calls git-annex to do the actual downloading. Such a script is available from diff --git a/doc/tips/Using_Git-annex_as_a_web_browsing_assistant/comment_1_74167f9fff400f148916003468c77de4._comment b/doc/tips/Using_Git-annex_as_a_web_browsing_assistant/comment_1_74167f9fff400f148916003468c77de4._comment deleted file mode 100644 index 4c5c3cdfaf..0000000000 --- a/doc/tips/Using_Git-annex_as_a_web_browsing_assistant/comment_1_74167f9fff400f148916003468c77de4._comment +++ /dev/null @@ -1,8 +0,0 @@ -[[!comment format=mdwn - username="http://joeyh.name/" - nickname="joey" - subject="comment 1" - date="2013-04-11T20:16:02Z" - content=""" -As of my last commit, you don't really need a separate download manager. The webapp will now display urls that `git annex addurl` is downloading in among the other transfers. -"""]] diff --git a/doc/todo/easy_way_to_reproduce_normal_download_command.mdwn b/doc/todo/easy_way_to_reproduce_normal_download_command.mdwn index 6846bc8a19..5e913a544d 100644 --- a/doc/todo/easy_way_to_reproduce_normal_download_command.mdwn +++ b/doc/todo/easy_way_to_reproduce_normal_download_command.mdwn @@ -22,3 +22,10 @@ what about the other settings, is it okay to hardcode those? maybe this would be easier if there would be an options override just like rsync, but separate ones for curl and wget... --[[anarcat]] + +> git-annex now only uses curl, and defaults to a built-in http downloader. +> The annex.web-download-command is no longer supported. annex.web-options +> can be used to pass options to curl. +> +> So, I don't think this todo is relevant anymore, closing [[done]]. +> --[[Joey]]