split out Utility.Url.Parse

This is mostly for git-repair which can't include all of Utility.Url
without adding many dependencies that are not really necessary.
This commit is contained in:
Joey Hess 2023-08-14 12:28:10 -04:00
parent adda6c1088
commit be028f10e5
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 102 additions and 53 deletions

View file

@ -34,6 +34,7 @@ module Annex.Url (
import Annex.Common import Annex.Common
import qualified Annex import qualified Annex
import qualified Utility.Url as U import qualified Utility.Url as U
import qualified Utility.Url.Parse as U
import Utility.Hash (IncrementalVerifier) import Utility.Hash (IncrementalVerifier)
import Utility.IPAddress import Utility.IPAddress
import Network.HTTP.Client.Restricted import Network.HTTP.Client.Restricted

View file

@ -16,6 +16,7 @@ import Assistant.NamedThread
import Utility.ThreadScheduler import Utility.ThreadScheduler
import Utility.NotificationBroadcaster import Utility.NotificationBroadcaster
import Utility.Url import Utility.Url
import Utility.Url.Parse
import Utility.PID import Utility.PID
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import qualified Git.Construct import qualified Git.Construct

View file

@ -22,7 +22,7 @@ import Assistant.Gpg
import Types.GitConfig import Types.GitConfig
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config
import Types.ProposedAccepted import Types.ProposedAccepted
import Utility.Url import Utility.Url.Parse
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Text as T import qualified Data.Text as T

View file

@ -33,7 +33,7 @@ import Logs.Location
import Utility.Metered import Utility.Metered
import Utility.HtmlDetect import Utility.HtmlDetect
import Utility.Path.Max import Utility.Path.Max
import Utility.Url (parseURIPortable) import Utility.Url.Parse
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import qualified Annex.Transfer as Transfer import qualified Annex.Transfer as Transfer

View file

@ -20,7 +20,7 @@ import Annex.Link
import Annex.FileMatcher import Annex.FileMatcher
import Annex.Ingest import Annex.Ingest
import Git.FilePath import Git.FilePath
import Utility.Url import Utility.Url.Parse
import Network.URI import Network.URI

View file

@ -39,7 +39,7 @@ import Git.Remote
import Git.FilePath import Git.FilePath
import qualified Git.Url as Url import qualified Git.Url as Url
import Utility.UserInfo import Utility.UserInfo
import Utility.Url (parseURIPortable) import Utility.Url.Parse
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P

View file

@ -15,6 +15,7 @@ import Git.Types
import Git.Command import Git.Command
import qualified Git.Config as Config import qualified Git.Config as Config
import Utility.Url import Utility.Url
import Utility.Url.Parse
import qualified Data.Map as M import qualified Data.Map as M
import Network.URI import Network.URI

View file

@ -23,7 +23,7 @@ import Types.CleanupActions
import Messages.Progress import Messages.Progress
import Utility.Metered import Utility.Metered
import Utility.Tmp import Utility.Tmp
import Utility.Url (parseURIPortable) import Utility.Url.Parse
import Backend.URL import Backend.URL
import Annex.Perms import Annex.Perms
import Annex.Tmp import Annex.Tmp

View file

@ -53,7 +53,8 @@ import Types.Export
import Types.Availability (Availability(..)) import Types.Availability (Availability(..))
import Types.Key import Types.Key
import Git.Types import Git.Types
import Utility.Url (URLString, parseURIPortable) import Utility.Url (URLString)
import Utility.Url.Parse
import qualified Utility.SimpleProtocol as Proto import qualified Utility.SimpleProtocol as Proto
import Control.Concurrent.STM import Control.Concurrent.STM

View file

@ -16,6 +16,7 @@ import qualified Utility.SimpleProtocol as Proto
import Types.GitConfig import Types.GitConfig
import Annex.ChangedRefs (ChangedRefs) import Annex.ChangedRefs (ChangedRefs)
import Utility.Url import Utility.Url
import Utility.Url.Parse
import Network.URI import Network.URI
import Control.Concurrent import Control.Concurrent

View file

@ -9,7 +9,6 @@
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
module Utility.Url ( module Utility.Url (
newManager, newManager,
@ -33,8 +32,6 @@ module Utility.Url (
downloadConduit, downloadConduit,
sinkResponseFile, sinkResponseFile,
downloadPartial, downloadPartial,
parseURIPortable,
parseURIRelaxed,
matchStatusCodeException, matchStatusCodeException,
matchHttpExceptionContent, matchHttpExceptionContent,
BasicAuth(..), BasicAuth(..),
@ -52,6 +49,7 @@ import Network.HTTP.Client.Restricted
import Utility.IPAddress import Utility.IPAddress
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import Utility.Hash (IncrementalVerifier(..)) import Utility.Hash (IncrementalVerifier(..))
import Utility.Url.Parse
import Network.URI import Network.URI
import Network.HTTP.Types import Network.HTTP.Types
@ -72,9 +70,6 @@ import Network.BSD (getProtocolNumber)
import Data.Either import Data.Either
import Data.Conduit import Data.Conduit
import Text.Read import Text.Read
#ifdef mingw32_HOST_OS
import qualified System.FilePath.Windows as PW
#endif
type URLString = String type URLString = String
@ -612,30 +607,6 @@ downloadPartial url uo n = case parseURIRelaxed url of
then Just <$> brReadSome (responseBody resp) n then Just <$> brReadSome (responseBody resp) n
else return Nothing else return Nothing
{- On unix this is the same as parseURI. But on Windows,
- it can parse urls such as file:///C:/path/to/file
- parseURI normally parses that as a path /C:/path/to/file
- and this simply removes the excess leading slash when there is a
- drive letter after it. -}
parseURIPortable :: URLString -> Maybe URI
#ifndef mingw32_HOST_OS
parseURIPortable = parseURI
#else
parseURIPortable s
| "file:" `isPrefixOf` s = do
u <- parseURI s
return $ case PW.splitDirectories (uriPath u) of
(p:d:_) | all PW.isPathSeparator p && PW.isDrive d ->
u { uriPath = dropWhile PW.isPathSeparator (uriPath u) }
_ -> u
| otherwise = parseURI s
#endif
{- Allows for spaces and other stuff in urls, properly escaping them. -}
parseURIRelaxed :: URLString -> Maybe URI
parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
parseURIPortable $ escapeURIString isAllowedInURI s
{- Generate a http-conduit Request for an URI. This is able {- Generate a http-conduit Request for an URI. This is able
- to deal with some urls that parseRequest would usually reject. - to deal with some urls that parseRequest would usually reject.
-} -}
@ -649,23 +620,6 @@ parseRequestRelaxed u = case uriAuthority u of
u { uriAuthority = Just $ ua { uriPort = "" } } u { uriAuthority = Just $ ua { uriPort = "" } }
_ -> parseRequest (show u) _ -> parseRequest (show u)
{- Some characters like '[' are allowed in eg, the address of
- an uri, but cannot appear unescaped further along in the uri.
- This handles that, expensively, by successively escaping each character
- from the back of the url until the url parses.
-}
parseURIRelaxed' :: URLString -> Maybe URI
parseURIRelaxed' s = go [] (reverse s)
where
go back [] = parseURI back
go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of
Just u -> Just u
Nothing -> go (escapeURIChar escapemore c ++ back) cs
escapemore '[' = False
escapemore ']' = False
escapemore c = isAllowedInURI c
hAcceptEncoding :: CI.CI B.ByteString hAcceptEncoding :: CI.CI B.ByteString
hAcceptEncoding = "Accept-Encoding" hAcceptEncoding = "Accept-Encoding"

89
Utility/Url/Parse.hs Normal file
View file

@ -0,0 +1,89 @@
{- Url parsing.
-
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
module Utility.Url.Parse (
parseURIPortable,
parseURIRelaxed,
) where
import Common
import Utility.Debug
import Utility.Metered
import Network.HTTP.Client.Restricted
import Utility.IPAddress
import qualified Utility.RawFilePath as R
import Utility.Hash (IncrementalVerifier(..))
import Network.URI
import Network.HTTP.Types
import qualified Data.CaseInsensitive as CI
import qualified Data.ByteString as B
import qualified Data.ByteString.UTF8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.Set as S
import Control.Exception (throwIO, evaluate)
import Control.Monad.Trans.Resource
import Control.Monad.IO.Class (MonadIO)
import Control.DeepSeq
import Network.HTTP.Conduit
import Network.HTTP.Client
import Network.HTTP.Simple (getResponseHeader)
import Network.Socket
import Network.BSD (getProtocolNumber)
import Data.Either
import Data.Conduit
import Text.Read
#ifdef mingw32_HOST_OS
import qualified System.FilePath.Windows as PW
#endif
{- On unix this is the same as parseURI. But on Windows,
- it can parse urls such as file:///C:/path/to/file
- parseURI normally parses that as a path /C:/path/to/file
- and this simply removes the excess leading slash when there is a
- drive letter after it. -}
parseURIPortable :: String -> Maybe URI
#ifndef mingw32_HOST_OS
parseURIPortable = parseURI
#else
parseURIPortable s
| "file:" `isPrefixOf` s = do
u <- parseURI s
return $ case PW.splitDirectories (uriPath u) of
(p:d:_) | all PW.isPathSeparator p && PW.isDrive d ->
u { uriPath = dropWhile PW.isPathSeparator (uriPath u) }
_ -> u
| otherwise = parseURI s
#endif
{- Allows for spaces and other stuff in urls, properly escaping them. -}
parseURIRelaxed :: String -> Maybe URI
parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
parseURIPortable $ escapeURIString isAllowedInURI s
{- Some characters like '[' are allowed in eg, the address of
- an uri, but cannot appear unescaped further along in the uri.
- This handles that, expensively, by successively escaping each character
- from the back of the url until the url parses.
-}
parseURIRelaxed' :: String -> Maybe URI
parseURIRelaxed' s = go [] (reverse s)
where
go back [] = parseURI back
go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of
Just u -> Just u
Nothing -> go (escapeURIChar escapemore c ++ back) cs
escapemore '[' = False
escapemore ']' = False
escapemore c = isAllowedInURI c

View file

@ -1175,6 +1175,7 @@ Executable git-annex
Utility.Touch Utility.Touch
Utility.Tuple Utility.Tuple
Utility.Url Utility.Url
Utility.Url.Parse
Utility.UserInfo Utility.UserInfo
Utility.Verifiable Utility.Verifiable