limit url downloads to whitelisted schemes

Security fix! Allowing any schemes, particularly file: and
possibly others like scp: allowed file exfiltration by anyone who had
write access to the git repository, since they could add an annexed file
using such an url, or using an url that redirected to such an url,
and wait for the victim to get it into their repository and send them a copy.

* Added annex.security.allowed-url-schemes setting, which defaults
  to only allowing http and https URLs. Note especially that file:/
  is no longer enabled by default.

* Removed annex.web-download-command, since its interface does not allow
  supporting annex.security.allowed-url-schemes across redirects.
  If you used this setting, you may want to instead use annex.web-options
  to pass options to curl.

With annex.web-download-command removed, nearly all url accesses in
git-annex are made via Utility.Url via http-client or curl. http-client
only supports http and https, so no problem there.
(Disabling one and not the other is not implemented.)

Used curl --proto to limit the allowed url schemes.

Note that this will cause git annex fsck --from web to mark files using
a disallowed url scheme as not being present in the web. That seems
acceptable; fsck --from web also does that when a web server is not available.

youtube-dl already disabled file: itself (probably for similar
reasons). The scheme check was also added to youtube-dl urls for
completeness, although that check won't catch any redirects it might
follow. But youtube-dl goes off and does its own thing with other
protocols anyway, so that's fine.

Special remotes that support other domain-specific url schemes are not
affected by this change. In the bittorrent remote, aria2c can still
download magnet: links. The download of the .torrent file is
otherwise now limited by annex.security.allowed-url-schemes.

This does not address any external special remotes that might download
an url themselves. Current thinking is all external special remotes will
need to be audited for this problem, although many of them will use
http libraries that only support http and not curl's menagarie.

The related problem of accessing private localhost and LAN urls is not
addressed by this commit.

This commit was sponsored by Brett Eisenberg on Patreon.
This commit is contained in:
Joey Hess 2018-06-15 16:52:24 -04:00
parent c8559a0403
commit 28720c795f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
16 changed files with 139 additions and 68 deletions

View file

@ -15,6 +15,9 @@ module Utility.Url (
managerSettings,
URLString,
UserAgent,
Scheme,
mkScheme,
allowedScheme,
UrlOptions(..),
defUrlOptions,
mkUrlOptions,
@ -41,6 +44,7 @@ import qualified Data.CaseInsensitive as CI
import qualified Data.ByteString as B
import qualified Data.ByteString.UTF8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import Control.Monad.Trans.Resource
import Network.HTTP.Conduit
import Network.HTTP.Client (brRead, withResponse)
@ -65,12 +69,22 @@ type Headers = [String]
type UserAgent = String
newtype Scheme = Scheme (CI.CI String)
deriving (Eq, Ord)
mkScheme :: String -> Scheme
mkScheme = Scheme . CI.mk
fromScheme :: Scheme -> String
fromScheme (Scheme s) = CI.original s
data UrlOptions = UrlOptions
{ userAgent :: Maybe UserAgent
, reqHeaders :: Headers
, urlDownloader :: UrlDownloader
, applyRequest :: Request -> Request
, httpManager :: Manager
, allowedSchemes :: S.Set Scheme
}
data UrlDownloader
@ -84,8 +98,9 @@ defUrlOptions = UrlOptions
<*> pure DownloadWithConduit
<*> pure id
<*> newManager managerSettings
<*> pure (S.fromList $ map mkScheme ["http", "https"])
mkUrlOptions :: Maybe UserAgent -> Headers -> [CommandParam] -> Manager -> UrlOptions
mkUrlOptions :: Maybe UserAgent -> Headers -> [CommandParam] -> Manager -> S.Set Scheme -> UrlOptions
mkUrlOptions defuseragent reqheaders reqparams manager =
UrlOptions useragent reqheaders urldownloader applyrequest manager
where
@ -115,7 +130,7 @@ mkUrlOptions defuseragent reqheaders reqparams manager =
_ -> (h', B8.fromString v)
curlParams :: UrlOptions -> [CommandParam] -> [CommandParam]
curlParams uo ps = ps ++ uaparams ++ headerparams ++ addedparams
curlParams uo ps = ps ++ uaparams ++ headerparams ++ addedparams ++ schemeparams
where
uaparams = case userAgent uo of
Nothing -> []
@ -124,6 +139,25 @@ curlParams uo ps = ps ++ uaparams ++ headerparams ++ addedparams
addedparams = case urlDownloader uo of
DownloadWithConduit -> []
DownloadWithCurl l -> l
schemeparams =
[ Param "--proto"
, Param $ intercalate "," ("-all" : schemelist)
]
schemelist = map fromScheme $ S.toList $ allowedSchemes uo
checkPolicy :: UrlOptions -> URI -> a -> IO a -> IO a
checkPolicy uo u onerr a
| allowedScheme uo u = a
| otherwise = do
hPutStrLn stderr $
"Configuration does not allow accessing " ++ show u
hFlush stderr
return onerr
allowedScheme :: UrlOptions -> URI -> Bool
allowedScheme uo u = uscheme `S.member` allowedSchemes uo
where
uscheme = mkScheme $ takeWhile (/=':') (uriScheme u)
{- Checks that an url exists and could be successfully downloaded,
- also checking that its size, if available, matches a specified size. -}
@ -158,7 +192,8 @@ assumeUrlExists = UrlInfo True Nothing Nothing
- also returning its size and suggested filename if available. -}
getUrlInfo :: URLString -> UrlOptions -> IO UrlInfo
getUrlInfo url uo = case parseURIRelaxed url of
Just u -> case (urlDownloader uo, parseUrlConduit (show u)) of
Just u -> checkPolicy uo u dne $
case (urlDownloader uo, parseUrlConduit (show u)) of
(DownloadWithConduit, Just req) -> catchJust
-- When http redirects to a protocol which
-- conduit does not support, it will throw
@ -166,7 +201,7 @@ getUrlInfo url uo = case parseURIRelaxed url of
(matchStatusCodeException (== found302))
(existsconduit req)
(const (existscurl u))
`catchNonAsync` (const dne)
`catchNonAsync` (const $ return dne)
-- http-conduit does not support file:, ftp:, etc urls,
-- so fall back to reading files and using curl.
_
@ -177,11 +212,11 @@ getUrlInfo url uo = case parseURIRelaxed url of
Just stat -> do
sz <- getFileSize' f stat
found (Just sz) Nothing
Nothing -> dne
Nothing -> return dne
| otherwise -> existscurl u
Nothing -> dne
Nothing -> return dne
where
dne = return $ UrlInfo False Nothing Nothing
dne = UrlInfo False Nothing Nothing
found sz f = return $ UrlInfo True sz f
curlparams = curlParams uo $
@ -213,7 +248,7 @@ getUrlInfo url uo = case parseURIRelaxed url of
then found
(extractlen resp)
(extractfilename resp)
else dne
else return dne
existscurl u = do
output <- catchDefaultIO "" $
@ -230,7 +265,7 @@ getUrlInfo url uo = case parseURIRelaxed url of
-- don't try to parse ftp status codes; if curl
-- got a length, it's good
_ | isftp && isJust len -> good
_ -> dne
_ -> return dne
-- Parse eg: attachment; filename="fname.ext"
-- per RFC 2616
@ -265,7 +300,8 @@ download meterupdate url file uo =
`catchNonAsync` showerr
where
go = case parseURIRelaxed url of
Just u -> case (urlDownloader uo, parseUrlConduit (show u)) of
Just u -> checkPolicy uo u False $
case (urlDownloader uo, parseUrlConduit (show u)) of
(DownloadWithConduit, Just req) -> catchJust
-- When http redirects to a protocol which
-- conduit does not support, it will throw