Support http urls that contain ":" that is not followed by a port number
The same as git does. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
8fa3264f3a
commit
96d46db2d5
4 changed files with 50 additions and 4 deletions
|
@ -11,6 +11,8 @@ git-annex (10.20230127) UNRELEASED; urgency=medium
|
|||
* sync: Avoid pushing view branches to remotes.
|
||||
* sync: When run in a view branch, refresh the view branch to reflect any
|
||||
changes that have been made to the parent branch or metadata.
|
||||
* Support http urls that contain ":" that is not followed by a port
|
||||
number, the same as git does.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 06 Feb 2023 13:39:18 -0400
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- Url downloading.
|
||||
-
|
||||
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
@ -215,7 +215,7 @@ getUrlInfo url uo = case parseURIRelaxed url of
|
|||
Nothing -> return (Right dne)
|
||||
where
|
||||
go :: URI -> IO (Either String UrlInfo)
|
||||
go u = case (urlDownloader uo, parseRequest (show u)) of
|
||||
go u = case (urlDownloader uo, parseRequestRelaxed u) of
|
||||
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) ->
|
||||
existsconduit r req
|
||||
(DownloadWithConduit (DownloadWithCurlRestricted r), Nothing)
|
||||
|
@ -373,7 +373,7 @@ download' nocurlerror meterupdate iv url file uo =
|
|||
where
|
||||
go = case parseURIRelaxed url of
|
||||
Just u -> checkPolicy uo u $
|
||||
case (urlDownloader uo, parseRequest (show u)) of
|
||||
case (urlDownloader uo, parseRequestRelaxed u) of
|
||||
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust
|
||||
(matchStatusCodeException (== found302))
|
||||
(downloadConduit meterupdate iv req file uo >> return (Right ()))
|
||||
|
@ -598,7 +598,7 @@ downloadPartial url uo n = case parseURIRelaxed url of
|
|||
Nothing -> return Nothing
|
||||
Just u -> go u `catchNonAsync` const (return Nothing)
|
||||
where
|
||||
go u = case parseRequest (show u) of
|
||||
go u = case parseRequestRelaxed u of
|
||||
Nothing -> return Nothing
|
||||
Just req -> do
|
||||
let req' = applyRequest uo req
|
||||
|
@ -613,6 +613,19 @@ parseURIRelaxed :: URLString -> Maybe URI
|
|||
parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
|
||||
parseURI $ escapeURIString isAllowedInURI s
|
||||
|
||||
{- Generate a http-conduit Request for an URI. This is able
|
||||
- to deal with some urls that parseRequest would usually reject.
|
||||
-}
|
||||
parseRequestRelaxed :: MonadThrow m => URI -> m Request
|
||||
parseRequestRelaxed u = case uriAuthority u of
|
||||
Just ua
|
||||
-- parseURI can handle an empty port value, but
|
||||
-- parseRequest cannot. So remove the ':' to
|
||||
-- make it work.
|
||||
| uriPort ua == ":" -> parseRequest $ show $
|
||||
u { uriAuthority = Just $ ua { uriPort = "" } }
|
||||
_ -> parseRequest (show u)
|
||||
|
||||
{- Some characters like '[' are allowed in eg, the address of
|
||||
- an uri, but cannot appear unescaped further along in the uri.
|
||||
- This handles that, expensively, by successively escaping each character
|
||||
|
@ -686,6 +699,9 @@ curlRestrictedParams r u defport ps = case uriAuthority u of
|
|||
Nothing -> giveup "malformed url"
|
||||
Just uath -> case uriPort uath of
|
||||
"" -> go (uriRegName uath) defport
|
||||
-- ignore an empty port, same as
|
||||
-- parseRequestRelaxed does.
|
||||
":" -> go (uriRegName uath) defport
|
||||
-- strict parser because the port we provide to curl
|
||||
-- needs to match the port in the url
|
||||
(':':s) -> case readMaybe s :: Maybe Int of
|
||||
|
|
|
@ -32,3 +32,5 @@ Backstory: Happened to a user trying to access some NWB files on gin for DANDI p
|
|||
|
||||
[[!meta author=yoh]]
|
||||
[[!tag projects/dandi]]
|
||||
|
||||
> [[fixed|done]] --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2023-02-10T17:04:51Z"
|
||||
content="""
|
||||
Not a legal url really, RFC 1738 says "If the port is omitted, the colon is as well."
|
||||
But web browsers, curl, wget, etc do mostly seem to support it, so at least
|
||||
Postel's law seems to apply..
|
||||
|
||||
Here's the root cause of it failing:
|
||||
|
||||
ghci> parseRequest "https://datasets.datalad.org:/dbic/QA/.git/"
|
||||
*** Exception: InvalidUrlException "https://datasets.datalad.org:/dbic/QA/.git/" "Invalid port"
|
||||
|
||||
So http-conduit refuses to parse it and so can't be used to download it.
|
||||
|
||||
Filed an issue, but I don't know if they'll want to change
|
||||
http-conduit to accept a malformed url.
|
||||
<https://github.com/snoyberg/http-client/issues/501>
|
||||
|
||||
Since network-uri is able to parse it, into an URI
|
||||
that has `"uriPort = ":"`, git-annex could special
|
||||
case handling of the empty port there, changing it to ""
|
||||
and so generating an url that http-conduit can parse.
|
||||
I've implemented this fix.
|
||||
"""]]
|
Loading…
Reference in a new issue