Support http urls that contain ":" that is not followed by a port number
The same as git does. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
8fa3264f3a
commit
96d46db2d5
4 changed files with 50 additions and 4 deletions
|
@ -11,6 +11,8 @@ git-annex (10.20230127) UNRELEASED; urgency=medium
|
||||||
* sync: Avoid pushing view branches to remotes.
|
* sync: Avoid pushing view branches to remotes.
|
||||||
* sync: When run in a view branch, refresh the view branch to reflect any
|
* sync: When run in a view branch, refresh the view branch to reflect any
|
||||||
changes that have been made to the parent branch or metadata.
|
changes that have been made to the parent branch or metadata.
|
||||||
|
* Support http urls that contain ":" that is not followed by a port
|
||||||
|
number, the same as git does.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 06 Feb 2023 13:39:18 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 06 Feb 2023 13:39:18 -0400
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- Url downloading.
|
{- Url downloading.
|
||||||
-
|
-
|
||||||
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- License: BSD-2-clause
|
- License: BSD-2-clause
|
||||||
-}
|
-}
|
||||||
|
@ -215,7 +215,7 @@ getUrlInfo url uo = case parseURIRelaxed url of
|
||||||
Nothing -> return (Right dne)
|
Nothing -> return (Right dne)
|
||||||
where
|
where
|
||||||
go :: URI -> IO (Either String UrlInfo)
|
go :: URI -> IO (Either String UrlInfo)
|
||||||
go u = case (urlDownloader uo, parseRequest (show u)) of
|
go u = case (urlDownloader uo, parseRequestRelaxed u) of
|
||||||
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) ->
|
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) ->
|
||||||
existsconduit r req
|
existsconduit r req
|
||||||
(DownloadWithConduit (DownloadWithCurlRestricted r), Nothing)
|
(DownloadWithConduit (DownloadWithCurlRestricted r), Nothing)
|
||||||
|
@ -373,7 +373,7 @@ download' nocurlerror meterupdate iv url file uo =
|
||||||
where
|
where
|
||||||
go = case parseURIRelaxed url of
|
go = case parseURIRelaxed url of
|
||||||
Just u -> checkPolicy uo u $
|
Just u -> checkPolicy uo u $
|
||||||
case (urlDownloader uo, parseRequest (show u)) of
|
case (urlDownloader uo, parseRequestRelaxed u) of
|
||||||
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust
|
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust
|
||||||
(matchStatusCodeException (== found302))
|
(matchStatusCodeException (== found302))
|
||||||
(downloadConduit meterupdate iv req file uo >> return (Right ()))
|
(downloadConduit meterupdate iv req file uo >> return (Right ()))
|
||||||
|
@ -598,7 +598,7 @@ downloadPartial url uo n = case parseURIRelaxed url of
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just u -> go u `catchNonAsync` const (return Nothing)
|
Just u -> go u `catchNonAsync` const (return Nothing)
|
||||||
where
|
where
|
||||||
go u = case parseRequest (show u) of
|
go u = case parseRequestRelaxed u of
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just req -> do
|
Just req -> do
|
||||||
let req' = applyRequest uo req
|
let req' = applyRequest uo req
|
||||||
|
@ -613,6 +613,19 @@ parseURIRelaxed :: URLString -> Maybe URI
|
||||||
parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
|
parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $
|
||||||
parseURI $ escapeURIString isAllowedInURI s
|
parseURI $ escapeURIString isAllowedInURI s
|
||||||
|
|
||||||
|
{- Generate a http-conduit Request for an URI. This is able
|
||||||
|
- to deal with some urls that parseRequest would usually reject.
|
||||||
|
-}
|
||||||
|
parseRequestRelaxed :: MonadThrow m => URI -> m Request
|
||||||
|
parseRequestRelaxed u = case uriAuthority u of
|
||||||
|
Just ua
|
||||||
|
-- parseURI can handle an empty port value, but
|
||||||
|
-- parseRequest cannot. So remove the ':' to
|
||||||
|
-- make it work.
|
||||||
|
| uriPort ua == ":" -> parseRequest $ show $
|
||||||
|
u { uriAuthority = Just $ ua { uriPort = "" } }
|
||||||
|
_ -> parseRequest (show u)
|
||||||
|
|
||||||
{- Some characters like '[' are allowed in eg, the address of
|
{- Some characters like '[' are allowed in eg, the address of
|
||||||
- an uri, but cannot appear unescaped further along in the uri.
|
- an uri, but cannot appear unescaped further along in the uri.
|
||||||
- This handles that, expensively, by successively escaping each character
|
- This handles that, expensively, by successively escaping each character
|
||||||
|
@ -686,6 +699,9 @@ curlRestrictedParams r u defport ps = case uriAuthority u of
|
||||||
Nothing -> giveup "malformed url"
|
Nothing -> giveup "malformed url"
|
||||||
Just uath -> case uriPort uath of
|
Just uath -> case uriPort uath of
|
||||||
"" -> go (uriRegName uath) defport
|
"" -> go (uriRegName uath) defport
|
||||||
|
-- ignore an empty port, same as
|
||||||
|
-- parseRequestRelaxed does.
|
||||||
|
":" -> go (uriRegName uath) defport
|
||||||
-- strict parser because the port we provide to curl
|
-- strict parser because the port we provide to curl
|
||||||
-- needs to match the port in the url
|
-- needs to match the port in the url
|
||||||
(':':s) -> case readMaybe s :: Maybe Int of
|
(':':s) -> case readMaybe s :: Maybe Int of
|
||||||
|
|
|
@ -32,3 +32,5 @@ Backstory: Happened to a user trying to access some NWB files on gin for DANDI p
|
||||||
|
|
||||||
[[!meta author=yoh]]
|
[[!meta author=yoh]]
|
||||||
[[!tag projects/dandi]]
|
[[!tag projects/dandi]]
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 1"""
|
||||||
|
date="2023-02-10T17:04:51Z"
|
||||||
|
content="""
|
||||||
|
Not a legal url really, RFC 1738 says "If the port is omitted, the colon is as well."
|
||||||
|
But web browsers, curl, wget, etc do mostly seem to support it, so at least
|
||||||
|
Postel's law seems to apply..
|
||||||
|
|
||||||
|
Here's the root cause of it failing:
|
||||||
|
|
||||||
|
ghci> parseRequest "https://datasets.datalad.org:/dbic/QA/.git/"
|
||||||
|
*** Exception: InvalidUrlException "https://datasets.datalad.org:/dbic/QA/.git/" "Invalid port"
|
||||||
|
|
||||||
|
So http-conduit refuses to parse it and so can't be used to download it.
|
||||||
|
|
||||||
|
Filed an issue, but I don't know if they'll want to change
|
||||||
|
http-conduit to accept a malformed url.
|
||||||
|
<https://github.com/snoyberg/http-client/issues/501>
|
||||||
|
|
||||||
|
Since network-uri is able to parse it, into an URI
|
||||||
|
that has `"uriPort = ":"`, git-annex could special
|
||||||
|
case handling of the empty port there, changing it to ""
|
||||||
|
and so generating an url that http-conduit can parse.
|
||||||
|
I've implemented this fix.
|
||||||
|
"""]]
|
Loading…
Add table
Reference in a new issue