split out Utility.Url.Parse

This is mostly for git-repair which can't include all of Utility.Url
without adding many dependencies that are not really necessary.
This commit is contained in:
Joey Hess 2023-08-14 12:28:10 -04:00
parent adda6c1088
commit be028f10e5
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 102 additions and 53 deletions

View file

@ -34,6 +34,7 @@ module Annex.Url (
import Annex.Common
import qualified Annex
import qualified Utility.Url as U
import qualified Utility.Url.Parse as U
import Utility.Hash (IncrementalVerifier)
import Utility.IPAddress
import Network.HTTP.Client.Restricted

View file

@ -16,6 +16,7 @@ import Assistant.NamedThread
import Utility.ThreadScheduler
import Utility.NotificationBroadcaster
import Utility.Url
import Utility.Url.Parse
import Utility.PID
import qualified Utility.RawFilePath as R
import qualified Git.Construct

View file

@ -22,7 +22,7 @@ import Assistant.Gpg
import Types.GitConfig
import Annex.SpecialRemote.Config
import Types.ProposedAccepted
import Utility.Url
import Utility.Url.Parse
import qualified Data.Map as M
import qualified Data.Text as T

View file

@ -33,7 +33,7 @@ import Logs.Location
import Utility.Metered
import Utility.HtmlDetect
import Utility.Path.Max
import Utility.Url (parseURIPortable)
import Utility.Url.Parse
import qualified Utility.RawFilePath as R
import qualified Annex.Transfer as Transfer

View file

@ -20,7 +20,7 @@ import Annex.Link
import Annex.FileMatcher
import Annex.Ingest
import Git.FilePath
import Utility.Url
import Utility.Url.Parse
import Network.URI

View file

@ -39,7 +39,7 @@ import Git.Remote
import Git.FilePath
import qualified Git.Url as Url
import Utility.UserInfo
import Utility.Url (parseURIPortable)
import Utility.Url.Parse
import qualified Data.ByteString as B
import qualified System.FilePath.ByteString as P

View file

@ -15,6 +15,7 @@ import Git.Types
import Git.Command
import qualified Git.Config as Config
import Utility.Url
import Utility.Url.Parse
import qualified Data.Map as M
import Network.URI

View file

@ -23,7 +23,7 @@ import Types.CleanupActions
import Messages.Progress
import Utility.Metered
import Utility.Tmp
import Utility.Url (parseURIPortable)
import Utility.Url.Parse
import Backend.URL
import Annex.Perms
import Annex.Tmp

View file

@ -53,7 +53,8 @@ import Types.Export
import Types.Availability (Availability(..))
import Types.Key
import Git.Types
import Utility.Url (URLString, parseURIPortable)
import Utility.Url (URLString)
import Utility.Url.Parse
import qualified Utility.SimpleProtocol as Proto
import Control.Concurrent.STM

View file

@ -16,6 +16,7 @@ import qualified Utility.SimpleProtocol as Proto
import Types.GitConfig
import Annex.ChangedRefs (ChangedRefs)
import Utility.Url
import Utility.Url.Parse
import Network.URI
import Control.Concurrent

View file

@ -9,7 +9,6 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
module Utility.Url (
newManager,
@ -33,8 +32,6 @@ module Utility.Url (
downloadConduit,
sinkResponseFile,
downloadPartial,
parseURIPortable,
parseURIRelaxed,
matchStatusCodeException,
matchHttpExceptionContent,
BasicAuth(..),
@ -52,6 +49,7 @@ import Network.HTTP.Client.Restricted
import Utility.IPAddress
import qualified Utility.RawFilePath as R
import Utility.Hash (IncrementalVerifier(..))
import Utility.Url.Parse
import Network.URI
import Network.HTTP.Types
@ -72,9 +70,6 @@ import Network.BSD (getProtocolNumber)
import Data.Either
import Data.Conduit
import Text.Read
#ifdef mingw32_HOST_OS
import qualified System.FilePath.Windows as PW
#endif
type URLString = String
@ -612,30 +607,6 @@ downloadPartial url uo n = case parseURIRelaxed url of
then Just <$> brReadSome (responseBody resp) n
else return Nothing
{- On unix this is the same as parseURI. But on Windows,
- it can parse urls such as file:///C:/path/to/file
- parseURI normally parses that as a path /C:/path/to/file
- and this simply removes the excess leading slash when there is a
- drive letter after it. -}
parseURIPortable :: URLString -> Maybe URI
#ifndef mingw32_HOST_OS
parseURIPortable = parseURI
#else
parseURIPortable s
| "file:" `isPrefixOf` s = do
u <- parseURI s
return $ case PW.splitDirectories (uriPath u) of
(p:d:_) | all PW.isPathSeparator p && PW.isDrive d ->
u { uriPath = dropWhile PW.isPathSeparator (uriPath u) }
_ -> u
| otherwise = parseURI s
#endif
{- Allows for spaces and other stuff in urls, properly escaping them. -}
parseURIRelaxed :: URLString -> Maybe URI
parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
parseURIPortable $ escapeURIString isAllowedInURI s
{- Generate a http-conduit Request for an URI. This is able
- to deal with some urls that parseRequest would usually reject.
-}
@ -649,23 +620,6 @@ parseRequestRelaxed u = case uriAuthority u of
u { uriAuthority = Just $ ua { uriPort = "" } }
_ -> parseRequest (show u)
{- Some characters like '[' are allowed in eg, the address of
- an uri, but cannot appear unescaped further along in the uri.
- This handles that, expensively, by successively escaping each character
- from the back of the url until the url parses.
-}
parseURIRelaxed' :: URLString -> Maybe URI
parseURIRelaxed' s = go [] (reverse s)
where
go back [] = parseURI back
go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of
Just u -> Just u
Nothing -> go (escapeURIChar escapemore c ++ back) cs
escapemore '[' = False
escapemore ']' = False
escapemore c = isAllowedInURI c
hAcceptEncoding :: CI.CI B.ByteString
hAcceptEncoding = "Accept-Encoding"

89
Utility/Url/Parse.hs Normal file
View file

@ -0,0 +1,89 @@
{- Url parsing.
-
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
module Utility.Url.Parse (
parseURIPortable,
parseURIRelaxed,
) where
import Common
import Utility.Debug
import Utility.Metered
import Network.HTTP.Client.Restricted
import Utility.IPAddress
import qualified Utility.RawFilePath as R
import Utility.Hash (IncrementalVerifier(..))
import Network.URI
import Network.HTTP.Types
import qualified Data.CaseInsensitive as CI
import qualified Data.ByteString as B
import qualified Data.ByteString.UTF8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import Control.Exception (throwIO, evaluate)
import Control.Monad.Trans.Resource
import Control.Monad.IO.Class (MonadIO)
import Control.DeepSeq
import Network.HTTP.Conduit
import Network.HTTP.Client
import Network.HTTP.Simple (getResponseHeader)
import Network.Socket
import Network.BSD (getProtocolNumber)
import Data.Either
import Data.Conduit
import Text.Read
#ifdef mingw32_HOST_OS
import qualified System.FilePath.Windows as PW
#endif
{- On unix this is the same as parseURI. But on Windows,
- it can parse urls such as file:///C:/path/to/file
- parseURI normally parses that as a path /C:/path/to/file
- and this simply removes the excess leading slash when there is a
- drive letter after it. -}
parseURIPortable :: String -> Maybe URI
#ifndef mingw32_HOST_OS
parseURIPortable = parseURI
#else
parseURIPortable s
| "file:" `isPrefixOf` s = do
u <- parseURI s
return $ case PW.splitDirectories (uriPath u) of
(p:d:_) | all PW.isPathSeparator p && PW.isDrive d ->
u { uriPath = dropWhile PW.isPathSeparator (uriPath u) }
_ -> u
| otherwise = parseURI s
#endif
{- Allows for spaces and other stuff in urls, properly escaping them. -}
parseURIRelaxed :: String -> Maybe URI
parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
parseURIPortable $ escapeURIString isAllowedInURI s
{- Some characters like '[' are allowed in eg, the address of
- an uri, but cannot appear unescaped further along in the uri.
- This handles that, expensively, by successively escaping each character
- from the back of the url until the url parses.
-}
parseURIRelaxed' :: String -> Maybe URI
parseURIRelaxed' s = go [] (reverse s)
where
go back [] = parseURI back
go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of
Just u -> Just u
Nothing -> go (escapeURIChar escapemore c ++ back) cs
escapemore '[' = False
escapemore ']' = False
escapemore c = isAllowedInURI c

View file

@ -1175,6 +1175,7 @@ Executable git-annex
Utility.Touch
Utility.Tuple
Utility.Url
Utility.Url.Parse
Utility.UserInfo
Utility.Verifiable