diff --git a/CHANGELOG b/CHANGELOG index f2bd1dc924..4d9d5f0e98 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -11,6 +11,8 @@ git-annex (10.20230127) UNRELEASED; urgency=medium * sync: Avoid pushing view branches to remotes. * sync: When run in a view branch, refresh the view branch to reflect any changes that have been made to the parent branch or metadata. + * Support http urls that contain ":" that is not followed by a port + number, the same as git does. -- Joey Hess Mon, 06 Feb 2023 13:39:18 -0400 diff --git a/Utility/Url.hs b/Utility/Url.hs index a424674642..d55c447b69 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -1,6 +1,6 @@ {- Url downloading. - - - Copyright 2011-2022 Joey Hess + - Copyright 2011-2023 Joey Hess - - License: BSD-2-clause -} @@ -215,7 +215,7 @@ getUrlInfo url uo = case parseURIRelaxed url of Nothing -> return (Right dne) where go :: URI -> IO (Either String UrlInfo) - go u = case (urlDownloader uo, parseRequest (show u)) of + go u = case (urlDownloader uo, parseRequestRelaxed u) of (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> existsconduit r req (DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) @@ -373,7 +373,7 @@ download' nocurlerror meterupdate iv url file uo = where go = case parseURIRelaxed url of Just u -> checkPolicy uo u $ - case (urlDownloader uo, parseRequest (show u)) of + case (urlDownloader uo, parseRequestRelaxed u) of (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust (matchStatusCodeException (== found302)) (downloadConduit meterupdate iv req file uo >> return (Right ())) @@ -598,7 +598,7 @@ downloadPartial url uo n = case parseURIRelaxed url of Nothing -> return Nothing Just u -> go u `catchNonAsync` const (return Nothing) where - go u = case parseRequest (show u) of + go u = case parseRequestRelaxed u of Nothing -> return Nothing Just req -> do let req' = applyRequest uo req @@ -613,6 +613,19 @@ parseURIRelaxed :: URLString -> Maybe URI parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $ parseURI $ escapeURIString isAllowedInURI s +{- Generate a http-conduit Request for an URI. This is able + - to deal with some urls that parseRequest would usually reject. + -} +parseRequestRelaxed :: MonadThrow m => URI -> m Request +parseRequestRelaxed u = case uriAuthority u of + Just ua + -- parseURI can handle an empty port value, but + -- parseRequest cannot. So remove the ':' to + -- make it work. + | uriPort ua == ":" -> parseRequest $ show $ + u { uriAuthority = Just $ ua { uriPort = "" } } + _ -> parseRequest (show u) + {- Some characters like '[' are allowed in eg, the address of - an uri, but cannot appear unescaped further along in the uri. - This handles that, expensively, by successively escaping each character @@ -686,6 +699,9 @@ curlRestrictedParams r u defport ps = case uriAuthority u of Nothing -> giveup "malformed url" Just uath -> case uriPort uath of "" -> go (uriRegName uath) defport + -- ignore an empty port, same as + -- parseRequestRelaxed does. + ":" -> go (uriRegName uath) defport -- strict parser because the port we provide to curl -- needs to match the port in the url (':':s) -> case readMaybe s :: Maybe Int of diff --git a/doc/bugs/be_consistent_with_git_in_handling_http__58____47____47___with___58___.mdwn b/doc/bugs/be_consistent_with_git_in_handling_http__58____47____47___with___58___.mdwn index d0698db462..4bbc6fe71d 100644 --- a/doc/bugs/be_consistent_with_git_in_handling_http__58____47____47___with___58___.mdwn +++ b/doc/bugs/be_consistent_with_git_in_handling_http__58____47____47___with___58___.mdwn @@ -32,3 +32,5 @@ Backstory: Happened to a user trying to access some NWB files on gin for DANDI p [[!meta author=yoh]] [[!tag projects/dandi]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/be_consistent_with_git_in_handling_http__58____47____47___with___58___/comment_1_c41e4999eb41846eb841b6ce9802b77b._comment b/doc/bugs/be_consistent_with_git_in_handling_http__58____47____47___with___58___/comment_1_c41e4999eb41846eb841b6ce9802b77b._comment new file mode 100644 index 0000000000..0ec25e2200 --- /dev/null +++ b/doc/bugs/be_consistent_with_git_in_handling_http__58____47____47___with___58___/comment_1_c41e4999eb41846eb841b6ce9802b77b._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2023-02-10T17:04:51Z" + content=""" +Not a legal url really, RFC 1738 says "If the port is omitted, the colon is as well." +But web browsers, curl, wget, etc do mostly seem to support it, so at least +Postel's law seems to apply.. + +Here's the root cause of it failing: + + ghci> parseRequest "https://datasets.datalad.org:/dbic/QA/.git/" + *** Exception: InvalidUrlException "https://datasets.datalad.org:/dbic/QA/.git/" "Invalid port" + +So http-conduit refuses to parse it and so can't be used to download it. + +Filed an issue, but I don't know if they'll want to change +http-conduit to accept a malformed url. + + +Since network-uri is able to parse it, into an URI +that has `"uriPort = ":"`, git-annex could special +case handling of the empty port there, changing it to "" +and so generating an url that http-conduit can parse. +I've implemented this fix. +"""]]