split out Utility.Url.Parse
This is mostly for git-repair which can't include all of Utility.Url without adding many dependencies that are not really necessary.
This commit is contained in:
parent
adda6c1088
commit
be028f10e5
13 changed files with 102 additions and 53 deletions
89
Utility/Url/Parse.hs
Normal file
89
Utility/Url/Parse.hs
Normal file
|
@ -0,0 +1,89 @@
|
|||
{- Url parsing.
|
||||
-
|
||||
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Utility.Url.Parse (
|
||||
parseURIPortable,
|
||||
parseURIRelaxed,
|
||||
) where
|
||||
|
||||
import Common
|
||||
import Utility.Debug
|
||||
import Utility.Metered
|
||||
import Network.HTTP.Client.Restricted
|
||||
import Utility.IPAddress
|
||||
import qualified Utility.RawFilePath as R
|
||||
import Utility.Hash (IncrementalVerifier(..))
|
||||
|
||||
import Network.URI
|
||||
import Network.HTTP.Types
|
||||
import qualified Data.CaseInsensitive as CI
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.UTF8 as B8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.Set as S
|
||||
import Control.Exception (throwIO, evaluate)
|
||||
import Control.Monad.Trans.Resource
|
||||
import Control.Monad.IO.Class (MonadIO)
|
||||
import Control.DeepSeq
|
||||
import Network.HTTP.Conduit
|
||||
import Network.HTTP.Client
|
||||
import Network.HTTP.Simple (getResponseHeader)
|
||||
import Network.Socket
|
||||
import Network.BSD (getProtocolNumber)
|
||||
import Data.Either
|
||||
import Data.Conduit
|
||||
import Text.Read
|
||||
#ifdef mingw32_HOST_OS
|
||||
import qualified System.FilePath.Windows as PW
|
||||
#endif
|
||||
|
||||
{- On unix this is the same as parseURI. But on Windows,
|
||||
- it can parse urls such as file:///C:/path/to/file
|
||||
- parseURI normally parses that as a path /C:/path/to/file
|
||||
- and this simply removes the excess leading slash when there is a
|
||||
- drive letter after it. -}
|
||||
parseURIPortable :: String -> Maybe URI
|
||||
#ifndef mingw32_HOST_OS
|
||||
parseURIPortable = parseURI
|
||||
#else
|
||||
parseURIPortable s
|
||||
| "file:" `isPrefixOf` s = do
|
||||
u <- parseURI s
|
||||
return $ case PW.splitDirectories (uriPath u) of
|
||||
(p:d:_) | all PW.isPathSeparator p && PW.isDrive d ->
|
||||
u { uriPath = dropWhile PW.isPathSeparator (uriPath u) }
|
||||
_ -> u
|
||||
| otherwise = parseURI s
|
||||
#endif
|
||||
|
||||
{- Allows for spaces and other stuff in urls, properly escaping them. -}
|
||||
parseURIRelaxed :: String -> Maybe URI
|
||||
parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
|
||||
parseURIPortable $ escapeURIString isAllowedInURI s
|
||||
|
||||
{- Some characters like '[' are allowed in eg, the address of
|
||||
- an uri, but cannot appear unescaped further along in the uri.
|
||||
- This handles that, expensively, by successively escaping each character
|
||||
- from the back of the url until the url parses.
|
||||
-}
|
||||
parseURIRelaxed' :: String -> Maybe URI
|
||||
parseURIRelaxed' s = go [] (reverse s)
|
||||
where
|
||||
go back [] = parseURI back
|
||||
go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of
|
||||
Just u -> Just u
|
||||
Nothing -> go (escapeURIChar escapemore c ++ back) cs
|
||||
|
||||
escapemore '[' = False
|
||||
escapemore ']' = False
|
||||
escapemore c = isAllowedInURI c
|
Loading…
Add table
Add a link
Reference in a new issue