Support http urls that contain ":" that is not followed by a port number
The same as git does. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
		
					parent
					
						
							
								8fa3264f3a
							
						
					
				
			
			
				commit
				
					
						96d46db2d5
					
				
			
		
					 4 changed files with 50 additions and 4 deletions
				
			
		|  | @ -11,6 +11,8 @@ git-annex (10.20230127) UNRELEASED; urgency=medium | |||
|   * sync: Avoid pushing view branches to remotes. | ||||
|   * sync: When run in a view branch, refresh the view branch to reflect any | ||||
|     changes that have been made to the parent branch or metadata. | ||||
|   * Support http urls that contain ":" that is not followed by a port | ||||
|     number, the same as git does. | ||||
| 
 | ||||
|  -- Joey Hess <id@joeyh.name>  Mon, 06 Feb 2023 13:39:18 -0400 | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| {- Url downloading. | ||||
|  - | ||||
|  - Copyright 2011-2022 Joey Hess <id@joeyh.name> | ||||
|  - Copyright 2011-2023 Joey Hess <id@joeyh.name> | ||||
|  - | ||||
|  - License: BSD-2-clause | ||||
|  -} | ||||
|  | @ -215,7 +215,7 @@ getUrlInfo url uo = case parseURIRelaxed url of | |||
| 	Nothing -> return (Right dne) | ||||
|    where | ||||
| 	go :: URI -> IO (Either String UrlInfo) | ||||
| 	go u = case (urlDownloader uo, parseRequest (show u)) of | ||||
| 	go u = case (urlDownloader uo, parseRequestRelaxed u) of | ||||
| 		(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> | ||||
| 			existsconduit r req | ||||
| 		(DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) | ||||
|  | @ -373,7 +373,7 @@ download' nocurlerror meterupdate iv url file uo = | |||
|   where | ||||
| 	go = case parseURIRelaxed url of | ||||
| 		Just u -> checkPolicy uo u $ | ||||
| 			case (urlDownloader uo, parseRequest (show u)) of | ||||
| 			case (urlDownloader uo, parseRequestRelaxed u) of | ||||
| 				(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust | ||||
| 					(matchStatusCodeException (== found302)) | ||||
| 					(downloadConduit meterupdate iv req file uo >> return (Right ())) | ||||
|  | @ -598,7 +598,7 @@ downloadPartial url uo n = case parseURIRelaxed url of | |||
| 	Nothing -> return Nothing | ||||
| 	Just u -> go u `catchNonAsync` const (return Nothing) | ||||
|   where | ||||
| 	go u = case parseRequest (show u) of | ||||
| 	go u = case parseRequestRelaxed u of | ||||
| 		Nothing -> return Nothing | ||||
| 		Just req -> do | ||||
| 			let req' = applyRequest uo req | ||||
|  | @ -613,6 +613,19 @@ parseURIRelaxed :: URLString -> Maybe URI | |||
| parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $ | ||||
| 	parseURI $ escapeURIString isAllowedInURI s | ||||
| 
 | ||||
| {- Generate a http-conduit Request for an URI. This is able | ||||
|  - to deal with some urls that parseRequest would usually reject.  | ||||
|  -} | ||||
| parseRequestRelaxed :: MonadThrow m => URI -> m Request | ||||
| parseRequestRelaxed u = case uriAuthority u of | ||||
| 	Just ua | ||||
| 		-- parseURI can handle an empty port value, but | ||||
| 		-- parseRequest cannot. So remove the ':' to | ||||
| 		-- make it work. | ||||
| 		| uriPort ua == ":" -> parseRequest $ show $ | ||||
| 			u { uriAuthority = Just $ ua { uriPort = "" } } | ||||
| 	_ -> parseRequest (show u) | ||||
| 
 | ||||
| {- Some characters like '[' are allowed in eg, the address of | ||||
|  - an uri, but cannot appear unescaped further along in the uri. | ||||
|  - This handles that, expensively, by successively escaping each character | ||||
|  | @ -686,6 +699,9 @@ curlRestrictedParams r u defport ps = case uriAuthority u of | |||
| 	Nothing -> giveup "malformed url" | ||||
| 	Just uath -> case uriPort uath of | ||||
| 		"" -> go (uriRegName uath) defport | ||||
| 		-- ignore an empty port, same as | ||||
| 		-- parseRequestRelaxed does. | ||||
| 		":" -> go (uriRegName uath) defport | ||||
| 		-- strict parser because the port we provide to curl | ||||
| 		-- needs to match the port in the url | ||||
| 		(':':s) -> case readMaybe s :: Maybe Int of | ||||
|  |  | |||
|  | @ -32,3 +32,5 @@ Backstory: Happened to a user trying to access some NWB files on gin for DANDI p | |||
| 
 | ||||
| [[!meta author=yoh]] | ||||
| [[!tag projects/dandi]] | ||||
| 
 | ||||
| > [[fixed|done]] --[[Joey]] | ||||
|  |  | |||
|  | @ -0,0 +1,26 @@ | |||
| [[!comment format=mdwn | ||||
|  username="joey" | ||||
|  subject="""comment 1""" | ||||
|  date="2023-02-10T17:04:51Z" | ||||
|  content=""" | ||||
| Not a legal url really, RFC 1738 says "If the port is omitted, the colon is as well." | ||||
| But web browsers, curl, wget, etc do mostly seem to support it, so at least  | ||||
| Postel's law seems to apply.. | ||||
| 
 | ||||
| Here's the root cause of it failing: | ||||
| 
 | ||||
| 	ghci> parseRequest "https://datasets.datalad.org:/dbic/QA/.git/" | ||||
| 	*** Exception: InvalidUrlException "https://datasets.datalad.org:/dbic/QA/.git/" "Invalid port" | ||||
| 
 | ||||
| So http-conduit refuses to parse it and so can't be used to download it. | ||||
| 
 | ||||
| Filed an issue, but I don't know if they'll want to change | ||||
| http-conduit to accept a malformed url. | ||||
| <https://github.com/snoyberg/http-client/issues/501> | ||||
| 
 | ||||
| Since network-uri is able to parse it, into an URI | ||||
| that has `"uriPort = ":"`, git-annex could special  | ||||
| case handling of the empty port there, changing it to "" | ||||
| and so generating an url that http-conduit can parse. | ||||
| I've implemented this fix. | ||||
| """]] | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Joey Hess
				Joey Hess