2014-02-25 02:00:25 +00:00
|
|
|
{- Url downloading, with git-annex user agent and configured http
|
2018-06-17 17:05:30 +00:00
|
|
|
- headers, security restrictions, etc.
|
2013-09-28 18:35:21 +00:00
|
|
|
-
|
2022-08-15 16:22:01 +00:00
|
|
|
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
|
2013-09-28 18:35:21 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2013-09-28 18:35:21 +00:00
|
|
|
-}
|
|
|
|
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
2013-09-28 18:35:21 +00:00
|
|
|
module Annex.Url (
|
2014-02-25 02:00:25 +00:00
|
|
|
withUrlOptions,
|
2020-01-22 20:13:48 +00:00
|
|
|
withUrlOptionsPromptingCreds,
|
2018-04-04 19:15:12 +00:00
|
|
|
getUrlOptions,
|
2013-09-28 18:35:21 +00:00
|
|
|
getUserAgent,
|
2019-05-30 16:43:40 +00:00
|
|
|
ipAddressesUnlimited,
|
2019-11-12 17:33:41 +00:00
|
|
|
checkBoth,
|
|
|
|
download,
|
2020-05-15 16:51:09 +00:00
|
|
|
download',
|
2019-11-12 17:33:41 +00:00
|
|
|
exists,
|
|
|
|
getUrlInfo,
|
|
|
|
U.URLString,
|
|
|
|
U.UrlOptions(..),
|
|
|
|
U.UrlInfo(..),
|
|
|
|
U.sinkResponseFile,
|
|
|
|
U.matchStatusCodeException,
|
|
|
|
U.downloadConduit,
|
|
|
|
U.downloadPartial,
|
|
|
|
U.parseURIRelaxed,
|
|
|
|
U.allowedScheme,
|
|
|
|
U.assumeUrlExists,
|
2013-09-28 18:35:21 +00:00
|
|
|
) where
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2013-09-28 18:35:21 +00:00
|
|
|
import qualified Annex
|
2019-11-12 17:33:41 +00:00
|
|
|
import qualified Utility.Url as U
|
2023-08-14 16:28:10 +00:00
|
|
|
import qualified Utility.Url.Parse as U
|
2021-08-18 18:49:01 +00:00
|
|
|
import Utility.Hash (IncrementalVerifier)
|
2018-06-17 17:05:30 +00:00
|
|
|
import Utility.IPAddress
|
2020-06-22 15:30:33 +00:00
|
|
|
import Network.HTTP.Client.Restricted
|
2019-11-12 17:33:41 +00:00
|
|
|
import Utility.Metered
|
2020-01-22 20:13:48 +00:00
|
|
|
import Git.Credential
|
2017-12-14 16:46:57 +00:00
|
|
|
import qualified BuildInfo
|
2013-09-28 18:35:21 +00:00
|
|
|
|
2018-06-17 17:05:30 +00:00
|
|
|
import Network.Socket
|
2019-07-17 20:48:50 +00:00
|
|
|
import Network.HTTP.Client
|
|
|
|
import Network.HTTP.Client.TLS
|
2020-02-25 19:45:52 +00:00
|
|
|
import Text.Read
|
2022-08-15 16:22:01 +00:00
|
|
|
import qualified Data.Set as S
|
2018-06-17 17:05:30 +00:00
|
|
|
|
2013-09-28 18:35:21 +00:00
|
|
|
defaultUserAgent :: U.UserAgent
|
2017-12-14 16:46:57 +00:00
|
|
|
defaultUserAgent = "git-annex/" ++ BuildInfo.packageversion
|
2013-09-28 18:35:21 +00:00
|
|
|
|
2018-07-16 16:06:06 +00:00
|
|
|
getUserAgent :: Annex U.UserAgent
|
2022-06-28 20:02:01 +00:00
|
|
|
getUserAgent = Annex.getRead $
|
2018-07-16 16:06:06 +00:00
|
|
|
fromMaybe defaultUserAgent . Annex.useragent
|
2013-09-28 18:35:21 +00:00
|
|
|
|
2018-04-04 19:15:12 +00:00
|
|
|
getUrlOptions :: Annex U.UrlOptions
|
|
|
|
getUrlOptions = Annex.getState Annex.urloptions >>= \case
|
|
|
|
Just uo -> return uo
|
2018-04-04 19:00:51 +00:00
|
|
|
Nothing -> do
|
|
|
|
uo <- mk
|
|
|
|
Annex.changeState $ \s -> s
|
|
|
|
{ Annex.urloptions = Just uo }
|
2018-04-04 19:15:12 +00:00
|
|
|
return uo
|
2014-02-25 02:00:25 +00:00
|
|
|
where
|
2018-06-17 17:05:30 +00:00
|
|
|
mk = do
|
|
|
|
(urldownloader, manager) <- checkallowedaddr
|
2019-11-12 17:33:41 +00:00
|
|
|
U.mkUrlOptions
|
2018-07-16 16:06:06 +00:00
|
|
|
<$> (Just <$> getUserAgent)
|
2018-06-17 17:05:30 +00:00
|
|
|
<*> headers
|
|
|
|
<*> pure urldownloader
|
|
|
|
<*> pure manager
|
|
|
|
<*> (annexAllowedUrlSchemes <$> Annex.getGitConfig)
|
2021-07-02 14:43:44 +00:00
|
|
|
<*> pure (Just (\u -> "Configuration of annex.security.allowed-url-schemes does not allow accessing " ++ show u))
|
2020-01-22 20:13:48 +00:00
|
|
|
<*> pure U.noBasicAuth
|
2018-06-17 17:05:30 +00:00
|
|
|
|
2017-12-05 19:00:50 +00:00
|
|
|
headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case
|
|
|
|
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
|
|
|
|
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
|
2018-06-17 17:05:30 +00:00
|
|
|
|
2019-05-30 16:43:40 +00:00
|
|
|
checkallowedaddr = words . annexAllowedIPAddresses <$> Annex.getGitConfig >>= \case
|
2018-06-17 17:05:30 +00:00
|
|
|
["all"] -> do
|
|
|
|
curlopts <- map Param . annexWebOptions <$> Annex.getGitConfig
|
2022-08-15 16:22:01 +00:00
|
|
|
allowedurlschemes <- annexAllowedUrlSchemes <$> Annex.getGitConfig
|
2023-08-22 14:25:53 +00:00
|
|
|
let urldownloader = if null curlopts && not (any (`S.notMember` U.conduitUrlSchemes) allowedurlschemes)
|
2019-05-30 18:51:34 +00:00
|
|
|
then U.DownloadWithConduit $
|
|
|
|
U.DownloadWithCurlRestricted mempty
|
2018-10-04 17:43:29 +00:00
|
|
|
else U.DownloadWithCurl curlopts
|
2019-07-17 20:48:50 +00:00
|
|
|
manager <- liftIO $ U.newManager $
|
|
|
|
avoidtimeout $ tlsManagerSettings
|
2018-06-17 17:05:30 +00:00
|
|
|
return (urldownloader, manager)
|
2020-02-25 19:45:52 +00:00
|
|
|
allowedaddrsports -> do
|
2018-06-17 17:05:30 +00:00
|
|
|
addrmatcher <- liftIO $
|
|
|
|
(\l v -> any (\f -> f v) l) . catMaybes
|
2020-02-25 19:45:52 +00:00
|
|
|
<$> mapM (uncurry makeAddressMatcher)
|
|
|
|
(mapMaybe splitAddrPort allowedaddrsports)
|
2018-06-17 17:05:30 +00:00
|
|
|
-- Default to not allowing access to loopback
|
|
|
|
-- and private IP addresses to avoid data
|
|
|
|
-- leakage.
|
|
|
|
let isallowed addr
|
|
|
|
| addrmatcher addr = True
|
|
|
|
| isLoopbackAddress addr = False
|
|
|
|
| isPrivateAddress addr = False
|
|
|
|
| otherwise = True
|
2019-07-17 20:48:50 +00:00
|
|
|
let connectionrestricted = connectionRestricted
|
2019-05-30 16:43:40 +00:00
|
|
|
("Configuration of annex.security.allowed-ip-addresses does not allow accessing address " ++)
|
2019-07-17 20:48:50 +00:00
|
|
|
let r = addressRestriction $ \addr ->
|
|
|
|
if isallowed (addrAddress addr)
|
|
|
|
then Nothing
|
|
|
|
else Just (connectionrestricted addr)
|
2018-06-18 17:32:20 +00:00
|
|
|
(settings, pr) <- liftIO $
|
2019-07-17 20:48:50 +00:00
|
|
|
mkRestrictedManagerSettings r Nothing Nothing
|
2018-06-18 17:32:20 +00:00
|
|
|
case pr of
|
|
|
|
Nothing -> return ()
|
|
|
|
Just ProxyRestricted -> toplevelWarning True
|
2019-05-30 16:43:40 +00:00
|
|
|
"http proxy settings not used due to annex.security.allowed-ip-addresses configuration"
|
2019-07-17 20:48:50 +00:00
|
|
|
manager <- liftIO $ U.newManager $
|
|
|
|
avoidtimeout settings
|
2021-03-24 17:57:00 +00:00
|
|
|
-- Curl is not used, as its interface does not allow
|
|
|
|
-- preventing it from accessing specific IP addresses.
|
2019-05-30 18:51:34 +00:00
|
|
|
let urldownloader = U.DownloadWithConduit $
|
|
|
|
U.DownloadWithCurlRestricted r
|
|
|
|
return (urldownloader, manager)
|
2019-07-17 20:48:50 +00:00
|
|
|
|
|
|
|
-- http-client defailts to timing out a request after 30 seconds
|
|
|
|
-- or so, but some web servers are slower and git-annex has its own
|
|
|
|
-- separate timeout controls, so disable that.
|
|
|
|
avoidtimeout s = s { managerResponseTimeout = responseTimeoutNone }
|
2018-04-04 19:15:12 +00:00
|
|
|
|
2020-02-25 19:45:52 +00:00
|
|
|
splitAddrPort :: String -> Maybe (String, Maybe PortNumber)
|
|
|
|
splitAddrPort s
|
|
|
|
-- "[addr]:port" (also allow "[addr]")
|
|
|
|
| "[" `isPrefixOf` s = case splitc ']' (drop 1 s) of
|
|
|
|
[a,cp] -> case splitc ':' cp of
|
|
|
|
["",p] -> do
|
|
|
|
pn <- readMaybe p
|
|
|
|
return (a, Just pn)
|
|
|
|
[""] -> Just (a, Nothing)
|
|
|
|
_ -> Nothing
|
|
|
|
_ -> Nothing
|
|
|
|
| otherwise = Just (s, Nothing)
|
|
|
|
|
2019-05-30 16:43:40 +00:00
|
|
|
ipAddressesUnlimited :: Annex Bool
|
|
|
|
ipAddressesUnlimited =
|
|
|
|
("all" == ) . annexAllowedIPAddresses <$> Annex.getGitConfig
|
2018-06-17 18:46:22 +00:00
|
|
|
|
2018-04-04 19:15:12 +00:00
|
|
|
withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a
|
|
|
|
withUrlOptions a = a =<< getUrlOptions
|
2019-11-12 17:33:41 +00:00
|
|
|
|
2020-01-22 20:13:48 +00:00
|
|
|
-- When downloading an url, if authentication is needed, uses
|
|
|
|
-- git-credential to prompt for username and password.
|
2022-09-09 20:06:47 +00:00
|
|
|
--
|
|
|
|
-- Note that, when the downloader is curl, it will not use git-credential.
|
|
|
|
-- If the user wants to, they can configure curl to use a netrc file that
|
|
|
|
-- handles authentication.
|
2020-01-22 20:13:48 +00:00
|
|
|
withUrlOptionsPromptingCreds :: (U.UrlOptions -> Annex a) -> Annex a
|
|
|
|
withUrlOptionsPromptingCreds a = do
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
uo <- getUrlOptions
|
2020-01-22 20:38:34 +00:00
|
|
|
prompter <- mkPrompter
|
2022-09-09 17:53:38 +00:00
|
|
|
cc <- Annex.getRead Annex.gitcredentialcache
|
2020-01-22 20:13:48 +00:00
|
|
|
a $ uo
|
2020-01-22 20:38:34 +00:00
|
|
|
{ U.getBasicAuth = \u -> prompter $
|
2022-09-09 17:53:38 +00:00
|
|
|
getBasicAuthFromCredential g cc u
|
2020-01-22 20:13:48 +00:00
|
|
|
}
|
|
|
|
|
2019-11-12 17:33:41 +00:00
|
|
|
checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool
|
|
|
|
checkBoth url expected_size uo =
|
|
|
|
liftIO (U.checkBoth url expected_size uo) >>= \case
|
|
|
|
Right r -> return r
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
Left err -> warning (UnquotedString err) >> return False
|
2019-11-12 17:33:41 +00:00
|
|
|
|
2021-08-18 18:49:01 +00:00
|
|
|
download :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool
|
|
|
|
download meterupdate iv url file uo =
|
|
|
|
liftIO (U.download meterupdate iv url file uo) >>= \case
|
2019-11-12 17:33:41 +00:00
|
|
|
Right () -> return True
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
Left err -> warning (UnquotedString err) >> return False
|
2019-11-12 17:33:41 +00:00
|
|
|
|
2021-08-18 18:49:01 +00:00
|
|
|
download' :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ())
|
|
|
|
download' meterupdate iv url file uo =
|
|
|
|
liftIO (U.download meterupdate iv url file uo)
|
2020-05-15 16:51:09 +00:00
|
|
|
|
2019-11-12 17:33:41 +00:00
|
|
|
exists :: U.URLString -> U.UrlOptions -> Annex Bool
|
|
|
|
exists url uo = liftIO (U.exists url uo) >>= \case
|
|
|
|
Right b -> return b
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
Left err -> warning (UnquotedString err) >> return False
|
2019-11-12 17:33:41 +00:00
|
|
|
|
2020-04-27 17:48:14 +00:00
|
|
|
getUrlInfo :: U.URLString -> U.UrlOptions -> Annex (Either String U.UrlInfo)
|
|
|
|
getUrlInfo url uo = liftIO (U.getUrlInfo url uo)
|