improve url parsing more

Now can handle eg, "http://[::1]/download/cdrom-fontzip[foo]", where
the first [] need to stay unescaped, but the rest have to be escaped.
This commit is contained in:
Joey Hess 2015-06-14 13:54:24 -04:00
parent 829007d629
commit a6c56fb459

View file

@ -263,14 +263,22 @@ download' quiet url file uo = do
{- Allows for spaces and other stuff in urls, properly escaping them. -} {- Allows for spaces and other stuff in urls, properly escaping them. -}
parseURIRelaxed :: URLString -> Maybe URI parseURIRelaxed :: URLString -> Maybe URI
parseURIRelaxed s = maybe (go escapemore) Just $ go isAllowedInURI parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
where parseURI $ escapeURIString isAllowedInURI s
go f = parseURI $ escapeURIString f s
{- Some characters like '[' are allowed in eg, the address of {- Some characters like '[' are allowed in eg, the address of
- an uri, but cannot appear unescaped elsewhere in the uri. - an uri, but cannot appear unescaped further along in the uri.
- If parsing fails with those characters unescaped, fall back - This handles that, expensively, by successively escaping each character
- to escaping them too. - from the back of the url until the url parses.
-} -}
parseURIRelaxed' :: URLString -> Maybe URI
parseURIRelaxed' s = go [] (reverse s)
where
go back [] = parseURI back
go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of
Just u -> Just u
Nothing -> go (escapeURIChar escapemore c ++ back) cs
escapemore '[' = False escapemore '[' = False
escapemore ']' = False escapemore ']' = False
escapemore c = isAllowedInURI c escapemore c = isAllowedInURI c