improve url parsing more
Now can handle eg, "http://[::1]/download/cdrom-fontzip[foo]", where the first [] need to stay unescaped, but the rest have to be escaped.
This commit is contained in:
parent
829007d629
commit
a6c56fb459
1 changed files with 15 additions and 7 deletions
|
@ -263,14 +263,22 @@ download' quiet url file uo = do
|
|||
|
||||
{- Allows for spaces and other stuff in urls, properly escaping them. -}
|
||||
parseURIRelaxed :: URLString -> Maybe URI
|
||||
parseURIRelaxed s = maybe (go escapemore) Just $ go isAllowedInURI
|
||||
parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
|
||||
parseURI $ escapeURIString isAllowedInURI s
|
||||
|
||||
{- Some characters like '[' are allowed in eg, the address of
|
||||
- an uri, but cannot appear unescaped further along in the uri.
|
||||
- This handles that, expensively, by successively escaping each character
|
||||
- from the back of the url until the url parses.
|
||||
-}
|
||||
parseURIRelaxed' :: URLString -> Maybe URI
|
||||
parseURIRelaxed' s = go [] (reverse s)
|
||||
where
|
||||
go f = parseURI $ escapeURIString f s
|
||||
{- Some characters like '[' are allowed in eg, the address of
|
||||
- an uri, but cannot appear unescaped elsewhere in the uri.
|
||||
- If parsing fails with those characters unescaped, fall back
|
||||
- to escaping them too.
|
||||
-}
|
||||
go back [] = parseURI back
|
||||
go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of
|
||||
Just u -> Just u
|
||||
Nothing -> go (escapeURIChar escapemore c ++ back) cs
|
||||
|
||||
escapemore '[' = False
|
||||
escapemore ']' = False
|
||||
escapemore c = isAllowedInURI c
|
||||
|
|
Loading…
Add table
Reference in a new issue