git-annex/Annex/Url.hs
Joey Hess d2c842e9a1
don't force use of conduit in withUrlOptionsPromptingCreds
Use curl for downloads from git remotes when annex.url-options and other
git configs are set.

If the url needs a password, curl will fail, and git credential will not be
used to prompt for it. But the user can set --netrc in url-options and
put the password in the netrc file.

This also means that url-options settings like -4 will take effect.
That was the case before commit 1883f7ef8f
forced conduit to be used.
2022-09-09 16:07:32 -04:00

187 lines
6.1 KiB
Haskell

{- Url downloading, with git-annex user agent and configured http
- headers, security restrictions, etc.
-
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Url (
withUrlOptions,
withUrlOptionsPromptingCreds,
getUrlOptions,
getUserAgent,
ipAddressesUnlimited,
checkBoth,
download,
download',
exists,
getUrlInfo,
U.URLString,
U.UrlOptions(..),
U.UrlInfo(..),
U.sinkResponseFile,
U.matchStatusCodeException,
U.downloadConduit,
U.downloadPartial,
U.parseURIRelaxed,
U.allowedScheme,
U.assumeUrlExists,
) where
import Annex.Common
import qualified Annex
import qualified Utility.Url as U
import Utility.Hash (IncrementalVerifier)
import Utility.IPAddress
import Network.HTTP.Client.Restricted
import Utility.Metered
import Git.Credential
import qualified BuildInfo
import Network.Socket
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import Text.Read
import qualified Data.Set as S
defaultUserAgent :: U.UserAgent
defaultUserAgent = "git-annex/" ++ BuildInfo.packageversion
getUserAgent :: Annex U.UserAgent
getUserAgent = Annex.getRead $
fromMaybe defaultUserAgent . Annex.useragent
getUrlOptions :: Annex U.UrlOptions
getUrlOptions = Annex.getState Annex.urloptions >>= \case
Just uo -> return uo
Nothing -> do
uo <- mk
Annex.changeState $ \s -> s
{ Annex.urloptions = Just uo }
return uo
where
mk = do
(urldownloader, manager) <- checkallowedaddr
U.mkUrlOptions
<$> (Just <$> getUserAgent)
<*> headers
<*> pure urldownloader
<*> pure manager
<*> (annexAllowedUrlSchemes <$> Annex.getGitConfig)
<*> pure (Just (\u -> "Configuration of annex.security.allowed-url-schemes does not allow accessing " ++ show u))
<*> pure U.noBasicAuth
headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
checkallowedaddr = words . annexAllowedIPAddresses <$> Annex.getGitConfig >>= \case
["all"] -> do
curlopts <- map Param . annexWebOptions <$> Annex.getGitConfig
allowedurlschemes <- annexAllowedUrlSchemes <$> Annex.getGitConfig
let urldownloader = if null curlopts && not (any (`S.member` U.conduitUrlSchemes) allowedurlschemes)
then U.DownloadWithConduit $
U.DownloadWithCurlRestricted mempty
else U.DownloadWithCurl curlopts
manager <- liftIO $ U.newManager $
avoidtimeout $ tlsManagerSettings
return (urldownloader, manager)
allowedaddrsports -> do
addrmatcher <- liftIO $
(\l v -> any (\f -> f v) l) . catMaybes
<$> mapM (uncurry makeAddressMatcher)
(mapMaybe splitAddrPort allowedaddrsports)
-- Default to not allowing access to loopback
-- and private IP addresses to avoid data
-- leakage.
let isallowed addr
| addrmatcher addr = True
| isLoopbackAddress addr = False
| isPrivateAddress addr = False
| otherwise = True
let connectionrestricted = connectionRestricted
("Configuration of annex.security.allowed-ip-addresses does not allow accessing address " ++)
let r = addressRestriction $ \addr ->
if isallowed (addrAddress addr)
then Nothing
else Just (connectionrestricted addr)
(settings, pr) <- liftIO $
mkRestrictedManagerSettings r Nothing Nothing
case pr of
Nothing -> return ()
Just ProxyRestricted -> toplevelWarning True
"http proxy settings not used due to annex.security.allowed-ip-addresses configuration"
manager <- liftIO $ U.newManager $
avoidtimeout settings
-- Curl is not used, as its interface does not allow
-- preventing it from accessing specific IP addresses.
let urldownloader = U.DownloadWithConduit $
U.DownloadWithCurlRestricted r
return (urldownloader, manager)
-- http-client defailts to timing out a request after 30 seconds
-- or so, but some web servers are slower and git-annex has its own
-- separate timeout controls, so disable that.
avoidtimeout s = s { managerResponseTimeout = responseTimeoutNone }
splitAddrPort :: String -> Maybe (String, Maybe PortNumber)
splitAddrPort s
-- "[addr]:port" (also allow "[addr]")
| "[" `isPrefixOf` s = case splitc ']' (drop 1 s) of
[a,cp] -> case splitc ':' cp of
["",p] -> do
pn <- readMaybe p
return (a, Just pn)
[""] -> Just (a, Nothing)
_ -> Nothing
_ -> Nothing
| otherwise = Just (s, Nothing)
ipAddressesUnlimited :: Annex Bool
ipAddressesUnlimited =
("all" == ) . annexAllowedIPAddresses <$> Annex.getGitConfig
withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a
withUrlOptions a = a =<< getUrlOptions
-- When downloading an url, if authentication is needed, uses
-- git-credential to prompt for username and password.
--
-- Note that, when the downloader is curl, it will not use git-credential.
-- If the user wants to, they can configure curl to use a netrc file that
-- handles authentication.
withUrlOptionsPromptingCreds :: (U.UrlOptions -> Annex a) -> Annex a
withUrlOptionsPromptingCreds a = do
g <- Annex.gitRepo
uo <- getUrlOptions
prompter <- mkPrompter
cc <- Annex.getRead Annex.gitcredentialcache
a $ uo
{ U.getBasicAuth = \u -> prompter $
getBasicAuthFromCredential g cc u
}
checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool
checkBoth url expected_size uo =
liftIO (U.checkBoth url expected_size uo) >>= \case
Right r -> return r
Left err -> warning err >> return False
download :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool
download meterupdate iv url file uo =
liftIO (U.download meterupdate iv url file uo) >>= \case
Right () -> return True
Left err -> warning err >> return False
download' :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ())
download' meterupdate iv url file uo =
liftIO (U.download meterupdate iv url file uo)
exists :: U.URLString -> U.UrlOptions -> Annex Bool
exists url uo = liftIO (U.exists url uo) >>= \case
Right b -> return b
Left err -> warning err >> return False
getUrlInfo :: U.URLString -> U.UrlOptions -> Annex (Either String U.UrlInfo)
getUrlInfo url uo = liftIO (U.getUrlInfo url uo)