2014-02-25 02:00:25 +00:00
|
|
|
{- Url downloading, with git-annex user agent and configured http
|
2018-06-17 17:05:30 +00:00
|
|
|
- headers, security restrictions, etc.
|
2013-09-28 18:35:21 +00:00
|
|
|
-
|
2022-08-15 16:22:01 +00:00
|
|
|
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
|
2013-09-28 18:35:21 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2013-09-28 18:35:21 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Annex.Url (
|
2014-02-25 02:00:25 +00:00
|
|
|
withUrlOptions,
|
2020-01-22 20:13:48 +00:00
|
|
|
withUrlOptionsPromptingCreds,
|
2018-04-04 19:15:12 +00:00
|
|
|
getUrlOptions,
|
2013-09-28 18:35:21 +00:00
|
|
|
getUserAgent,
|
2019-05-30 16:43:40 +00:00
|
|
|
ipAddressesUnlimited,
|
2019-11-12 17:33:41 +00:00
|
|
|
checkBoth,
|
|
|
|
download,
|
2020-05-15 16:51:09 +00:00
|
|
|
download',
|
2019-11-12 17:33:41 +00:00
|
|
|
exists,
|
|
|
|
getUrlInfo,
|
|
|
|
U.URLString,
|
|
|
|
U.UrlOptions(..),
|
|
|
|
U.UrlInfo(..),
|
|
|
|
U.sinkResponseFile,
|
|
|
|
U.matchStatusCodeException,
|
|
|
|
U.downloadConduit,
|
|
|
|
U.downloadPartial,
|
|
|
|
U.parseURIRelaxed,
|
|
|
|
U.allowedScheme,
|
|
|
|
U.assumeUrlExists,
|
2013-09-28 18:35:21 +00:00
|
|
|
) where
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2013-09-28 18:35:21 +00:00
|
|
|
import qualified Annex
|
2019-11-12 17:33:41 +00:00
|
|
|
import qualified Utility.Url as U
|
2021-08-18 18:49:01 +00:00
|
|
|
import Utility.Hash (IncrementalVerifier)
|
2018-06-17 17:05:30 +00:00
|
|
|
import Utility.IPAddress
|
2020-06-22 15:30:33 +00:00
|
|
|
import Network.HTTP.Client.Restricted
|
2019-11-12 17:33:41 +00:00
|
|
|
import Utility.Metered
|
2020-01-22 20:13:48 +00:00
|
|
|
import Git.Credential
|
2017-12-14 16:46:57 +00:00
|
|
|
import qualified BuildInfo
|
2013-09-28 18:35:21 +00:00
|
|
|
|
2018-06-17 17:05:30 +00:00
|
|
|
import Network.Socket
|
2019-07-17 20:48:50 +00:00
|
|
|
import Network.HTTP.Client
|
|
|
|
import Network.HTTP.Client.TLS
|
2020-02-25 19:45:52 +00:00
|
|
|
import Text.Read
|
2022-08-15 16:22:01 +00:00
|
|
|
import qualified Data.Set as S
|
2018-06-17 17:05:30 +00:00
|
|
|
|
2013-09-28 18:35:21 +00:00
|
|
|
defaultUserAgent :: U.UserAgent
|
2017-12-14 16:46:57 +00:00
|
|
|
defaultUserAgent = "git-annex/" ++ BuildInfo.packageversion
|
2013-09-28 18:35:21 +00:00
|
|
|
|
2018-07-16 16:06:06 +00:00
|
|
|
getUserAgent :: Annex U.UserAgent
|
2022-06-28 20:02:01 +00:00
|
|
|
getUserAgent = Annex.getRead $
|
2018-07-16 16:06:06 +00:00
|
|
|
fromMaybe defaultUserAgent . Annex.useragent
|
2013-09-28 18:35:21 +00:00
|
|
|
|
2018-04-04 19:15:12 +00:00
|
|
|
getUrlOptions :: Annex U.UrlOptions
|
|
|
|
getUrlOptions = Annex.getState Annex.urloptions >>= \case
|
|
|
|
Just uo -> return uo
|
2018-04-04 19:00:51 +00:00
|
|
|
Nothing -> do
|
|
|
|
uo <- mk
|
|
|
|
Annex.changeState $ \s -> s
|
|
|
|
{ Annex.urloptions = Just uo }
|
2018-04-04 19:15:12 +00:00
|
|
|
return uo
|
2014-02-25 02:00:25 +00:00
|
|
|
where
|
2018-06-17 17:05:30 +00:00
|
|
|
mk = do
|
|
|
|
(urldownloader, manager) <- checkallowedaddr
|
2019-11-12 17:33:41 +00:00
|
|
|
U.mkUrlOptions
|
2018-07-16 16:06:06 +00:00
|
|
|
<$> (Just <$> getUserAgent)
|
2018-06-17 17:05:30 +00:00
|
|
|
<*> headers
|
|
|
|
<*> pure urldownloader
|
|
|
|
<*> pure manager
|
|
|
|
<*> (annexAllowedUrlSchemes <$> Annex.getGitConfig)
|
2021-07-02 14:43:44 +00:00
|
|
|
<*> pure (Just (\u -> "Configuration of annex.security.allowed-url-schemes does not allow accessing " ++ show u))
|
2020-01-22 20:13:48 +00:00
|
|
|
<*> pure U.noBasicAuth
|
2018-06-17 17:05:30 +00:00
|
|
|
|
2017-12-05 19:00:50 +00:00
|
|
|
headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case
|
|
|
|
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
|
|
|
|
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
|
2018-06-17 17:05:30 +00:00
|
|
|
|
2019-05-30 16:43:40 +00:00
|
|
|
checkallowedaddr = words . annexAllowedIPAddresses <$> Annex.getGitConfig >>= \case
|
2018-06-17 17:05:30 +00:00
|
|
|
["all"] -> do
|
|
|
|
curlopts <- map Param . annexWebOptions <$> Annex.getGitConfig
|
2022-08-15 16:22:01 +00:00
|
|
|
allowedurlschemes <- annexAllowedUrlSchemes <$> Annex.getGitConfig
|
|
|
|
let urldownloader = if null curlopts && not (any (`S.member` U.conduitUrlSchemes) allowedurlschemes)
|
2019-05-30 18:51:34 +00:00
|
|
|
then U.DownloadWithConduit $
|
|
|
|
U.DownloadWithCurlRestricted mempty
|
2018-10-04 17:43:29 +00:00
|
|
|
else U.DownloadWithCurl curlopts
|
2019-07-17 20:48:50 +00:00
|
|
|
manager <- liftIO $ U.newManager $
|
|
|
|
avoidtimeout $ tlsManagerSettings
|
2018-06-17 17:05:30 +00:00
|
|
|
return (urldownloader, manager)
|
2020-02-25 19:45:52 +00:00
|
|
|
allowedaddrsports -> do
|
2018-06-17 17:05:30 +00:00
|
|
|
addrmatcher <- liftIO $
|
|
|
|
(\l v -> any (\f -> f v) l) . catMaybes
|
2020-02-25 19:45:52 +00:00
|
|
|
<$> mapM (uncurry makeAddressMatcher)
|
|
|
|
(mapMaybe splitAddrPort allowedaddrsports)
|
2018-06-17 17:05:30 +00:00
|
|
|
-- Default to not allowing access to loopback
|
|
|
|
-- and private IP addresses to avoid data
|
|
|
|
-- leakage.
|
|
|
|
let isallowed addr
|
|
|
|
| addrmatcher addr = True
|
|
|
|
| isLoopbackAddress addr = False
|
|
|
|
| isPrivateAddress addr = False
|
|
|
|
| otherwise = True
|
2019-07-17 20:48:50 +00:00
|
|
|
let connectionrestricted = connectionRestricted
|
2019-05-30 16:43:40 +00:00
|
|
|
("Configuration of annex.security.allowed-ip-addresses does not allow accessing address " ++)
|
2019-07-17 20:48:50 +00:00
|
|
|
let r = addressRestriction $ \addr ->
|
|
|
|
if isallowed (addrAddress addr)
|
|
|
|
then Nothing
|
|
|
|
else Just (connectionrestricted addr)
|
2018-06-18 17:32:20 +00:00
|
|
|
(settings, pr) <- liftIO $
|
2019-07-17 20:48:50 +00:00
|
|
|
mkRestrictedManagerSettings r Nothing Nothing
|
2018-06-18 17:32:20 +00:00
|
|
|
case pr of
|
|
|
|
Nothing -> return ()
|
|
|
|
Just ProxyRestricted -> toplevelWarning True
|
2019-05-30 16:43:40 +00:00
|
|
|
"http proxy settings not used due to annex.security.allowed-ip-addresses configuration"
|
2019-07-17 20:48:50 +00:00
|
|
|
manager <- liftIO $ U.newManager $
|
|
|
|
avoidtimeout settings
|
2021-03-24 17:57:00 +00:00
|
|
|
-- Curl is not used, as its interface does not allow
|
|
|
|
-- preventing it from accessing specific IP addresses.
|
2019-05-30 18:51:34 +00:00
|
|
|
let urldownloader = U.DownloadWithConduit $
|
|
|
|
U.DownloadWithCurlRestricted r
|
|
|
|
return (urldownloader, manager)
|
2019-07-17 20:48:50 +00:00
|
|
|
|
|
|
|
-- http-client defailts to timing out a request after 30 seconds
|
|
|
|
-- or so, but some web servers are slower and git-annex has its own
|
|
|
|
-- separate timeout controls, so disable that.
|
|
|
|
avoidtimeout s = s { managerResponseTimeout = responseTimeoutNone }
|
2018-04-04 19:15:12 +00:00
|
|
|
|
2020-02-25 19:45:52 +00:00
|
|
|
splitAddrPort :: String -> Maybe (String, Maybe PortNumber)
|
|
|
|
splitAddrPort s
|
|
|
|
-- "[addr]:port" (also allow "[addr]")
|
|
|
|
| "[" `isPrefixOf` s = case splitc ']' (drop 1 s) of
|
|
|
|
[a,cp] -> case splitc ':' cp of
|
|
|
|
["",p] -> do
|
|
|
|
pn <- readMaybe p
|
|
|
|
return (a, Just pn)
|
|
|
|
[""] -> Just (a, Nothing)
|
|
|
|
_ -> Nothing
|
|
|
|
_ -> Nothing
|
|
|
|
| otherwise = Just (s, Nothing)
|
|
|
|
|
2019-05-30 16:43:40 +00:00
|
|
|
ipAddressesUnlimited :: Annex Bool
|
|
|
|
ipAddressesUnlimited =
|
|
|
|
("all" == ) . annexAllowedIPAddresses <$> Annex.getGitConfig
|
2018-06-17 18:46:22 +00:00
|
|
|
|
2018-04-04 19:15:12 +00:00
|
|
|
withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a
|
|
|
|
withUrlOptions a = a =<< getUrlOptions
|
2019-11-12 17:33:41 +00:00
|
|
|
|
2020-01-22 20:13:48 +00:00
|
|
|
-- When downloading an url, if authentication is needed, uses
|
|
|
|
-- git-credential to prompt for username and password.
|
|
|
|
withUrlOptionsPromptingCreds :: (U.UrlOptions -> Annex a) -> Annex a
|
|
|
|
withUrlOptionsPromptingCreds a = do
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
uo <- getUrlOptions
|
2020-01-22 20:38:34 +00:00
|
|
|
prompter <- mkPrompter
|
2020-01-22 20:13:48 +00:00
|
|
|
a $ uo
|
2020-01-22 20:38:34 +00:00
|
|
|
{ U.getBasicAuth = \u -> prompter $
|
|
|
|
getBasicAuthFromCredential g u
|
2020-01-22 20:13:48 +00:00
|
|
|
-- Can't download with curl and handle basic auth,
|
2020-01-22 20:38:34 +00:00
|
|
|
-- so make sure it uses conduit.
|
2020-01-22 20:13:48 +00:00
|
|
|
, U.urlDownloader = case U.urlDownloader uo of
|
2020-01-22 20:38:34 +00:00
|
|
|
U.DownloadWithCurl _ -> U.DownloadWithConduit $
|
|
|
|
U.DownloadWithCurlRestricted mempty
|
2020-01-22 20:13:48 +00:00
|
|
|
v -> v
|
|
|
|
}
|
|
|
|
|
2019-11-12 17:33:41 +00:00
|
|
|
checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool
|
|
|
|
checkBoth url expected_size uo =
|
|
|
|
liftIO (U.checkBoth url expected_size uo) >>= \case
|
|
|
|
Right r -> return r
|
|
|
|
Left err -> warning err >> return False
|
|
|
|
|
2021-08-18 18:49:01 +00:00
|
|
|
download :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool
|
|
|
|
download meterupdate iv url file uo =
|
|
|
|
liftIO (U.download meterupdate iv url file uo) >>= \case
|
2019-11-12 17:33:41 +00:00
|
|
|
Right () -> return True
|
|
|
|
Left err -> warning err >> return False
|
|
|
|
|
2021-08-18 18:49:01 +00:00
|
|
|
download' :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ())
|
|
|
|
download' meterupdate iv url file uo =
|
|
|
|
liftIO (U.download meterupdate iv url file uo)
|
2020-05-15 16:51:09 +00:00
|
|
|
|
2019-11-12 17:33:41 +00:00
|
|
|
exists :: U.URLString -> U.UrlOptions -> Annex Bool
|
|
|
|
exists url uo = liftIO (U.exists url uo) >>= \case
|
|
|
|
Right b -> return b
|
|
|
|
Left err -> warning err >> return False
|
|
|
|
|
2020-04-27 17:48:14 +00:00
|
|
|
getUrlInfo :: U.URLString -> U.UrlOptions -> Annex (Either String U.UrlInfo)
|
|
|
|
getUrlInfo url uo = liftIO (U.getUrlInfo url uo)
|