From 19b5137227cf42181325f3743b6138963fa93d75 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 27 Apr 2020 13:48:14 -0400 Subject: [PATCH] addurl --fast error message improvement addurl: When run with --fast on an url that annex.security.allowed-ip-addresses prevents accessing, display a more useful message. (Also importfeed --fast potentially.) --- Annex/Url.hs | 8 ++------ CHANGELOG | 3 +++ Command/AddUrl.hs | 13 ++++++++---- Command/ImportFeed.hs | 13 +++++++----- Utility/Url.hs | 47 +++++++++++++++++++++++++++++-------------- 5 files changed, 54 insertions(+), 30 deletions(-) diff --git a/Annex/Url.hs b/Annex/Url.hs index 29fe6997b3..fc8c543b87 100644 --- a/Annex/Url.hs +++ b/Annex/Url.hs @@ -177,9 +177,5 @@ exists url uo = liftIO (U.exists url uo) >>= \case Right b -> return b Left err -> warning err >> return False -getUrlInfo :: U.URLString -> U.UrlOptions -> Annex U.UrlInfo -getUrlInfo url uo = liftIO (U.getUrlInfo url uo) >>= \case - Right i -> return i - Left err -> do - warning err - return $ U.UrlInfo False Nothing Nothing +getUrlInfo :: U.URLString -> U.UrlOptions -> Annex (Either String U.UrlInfo) +getUrlInfo url uo = liftIO (U.getUrlInfo url uo) diff --git a/CHANGELOG b/CHANGELOG index cc01259c93..9b9029768f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -34,6 +34,9 @@ git-annex (8.20200331) UNRELEASED; urgency=medium * sync: When some remotes to sync with are specified, and --fast is too, pick the lowest cost of the specified remotes, do not sync with a faster remote that was not specified. + * addurl: When run with --fast on an url that + annex.security.allowed-ip-addresses prevents accessing, display + a more useful message. -- Joey Hess Mon, 30 Mar 2020 15:58:34 -0400 diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index cd7d75cbd5..9097b7f94a 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -194,11 +194,16 @@ startWeb addunlockedmatcher o urlstring = go $ fromMaybe bad $ parseURI urlstrin where bad = fromMaybe (giveup $ "bad url " ++ urlstring) $ Url.parseURIRelaxed $ urlstring - go url = startingAddUrl urlstring o $ do + go url = startingAddUrl urlstring o $ + if relaxedOption (downloadOptions o) + then go' url Url.assumeUrlExists + else Url.withUrlOptions (Url.getUrlInfo urlstring) >>= \case + Right urlinfo -> go' url urlinfo + Left err -> do + warning err + next $ return False + go' url urlinfo = do pathmax <- liftIO $ fileNameLengthLimit "." - urlinfo <- if relaxedOption (downloadOptions o) - then pure Url.assumeUrlExists - else Url.withUrlOptions $ Url.getUrlInfo urlstring file <- adjustFile o <$> case fileOption (downloadOptions o) of Just f -> pure f Nothing -> case Url.urlSuggestedFile urlinfo of diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index cd14fa836b..6b670668cc 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -162,10 +162,6 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl r <- Remote.claimingUrl url if Remote.uuid r == webUUID || rawOption (downloadOptions opts) then do - urlinfo <- if relaxedOption (downloadOptions opts) - then pure Url.assumeUrlExists - else Url.withUrlOptions $ - Url.getUrlInfo url let dlopts = (downloadOptions opts) -- force using the filename -- chosen here @@ -173,7 +169,14 @@ performDownload addunlockedmatcher opts cache todownload = case location todownl -- don't use youtube-dl , rawOption = True } - maybeToList <$> addUrlFile addunlockedmatcher dlopts url urlinfo f + let go urlinfo = maybeToList <$> addUrlFile addunlockedmatcher dlopts url urlinfo f + if relaxedOption (downloadOptions opts) + then go Url.assumeUrlExists + else Url.withUrlOptions (Url.getUrlInfo url) >>= \case + Right urlinfo -> go urlinfo + Left err -> do + warning err + return [] else do res <- tryNonAsync $ maybe (error $ "unable to checkUrl of " ++ Remote.name r) diff --git a/Utility/Url.hs b/Utility/Url.hs index 7ef0f75ec6..196a2b1645 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -162,8 +162,8 @@ allowedScheme uo u = uscheme `S.member` allowedSchemes uo {- Checks that an url exists and could be successfully downloaded, - also checking that its size, if available, matches a specified size. - - - The Left error is returned if policy does not allow accessing the url - - or the url scheme is not supported. + - The Left error is returned if policy or the restricted http manager + - does not allow accessing the url or the url scheme is not supported. -} checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO (Either String Bool) checkBoth url expected_size uo = fmap go <$> check url expected_size uo @@ -195,8 +195,8 @@ assumeUrlExists = UrlInfo True Nothing Nothing {- Checks that an url exists and could be successfully downloaded, - also returning its size and suggested filename if available. - - - The Left error is returned if policy does not allow accessing the url - - or the url scheme is not supported. + - The Left error is returned if policy or the restricted http manages + - does not allow accessing the url or the url scheme is not supported. -} getUrlInfo :: URLString -> UrlOptions -> IO (Either String UrlInfo) getUrlInfo url uo = case parseURIRelaxed url of @@ -205,15 +205,8 @@ getUrlInfo url uo = case parseURIRelaxed url of where go :: URI -> IO (Either String UrlInfo) go u = case (urlDownloader uo, parseRequest (show u)) of - (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust - -- When http redirects to a protocol which - -- conduit does not support, it will throw - -- a StatusCodeException with found302 - -- and a Response with the redir Location. - (matchStatusCodeException (== found302)) - (Right <$> existsconduit req uo) - (followredir r) - `catchNonAsync` (const $ return $ Right dne) + (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> + existsconduit r req (DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) | isfileurl u -> Right <$> existsfile u | isftpurl u -> (Right <$> existscurlrestricted r u url ftpport) @@ -250,7 +243,23 @@ getUrlInfo url uo = case parseURIRelaxed url of extractfilename = contentDispositionFilename . B8.toString <=< lookup hContentDisposition . responseHeaders - existsconduit req uo' = do + existsconduit r req = + let go = catchcrossprotoredir r (existsconduit' req uo) + in catchJust matchconnectionrestricted go retconnectionrestricted + + matchconnectionrestricted he@(HttpExceptionRequest _ (InternalException ie)) = + case fromException ie of + Just (ConnectionRestricted why) -> Just he + _ -> Nothing + matchconnectionrestricted _ = Nothing + + retconnectionrestricted he@(HttpExceptionRequest _ (InternalException ie)) = + case fromException ie of + Just (ConnectionRestricted why) -> return (Left why) + _ -> throwM he + retconnectionrestricted he = throwM he + + existsconduit' req uo' = do let req' = headRequest (applyRequest uo req) debugM "url" (show req') join $ runResourceT $ do @@ -266,7 +275,7 @@ getUrlInfo url uo = case parseURIRelaxed url of then return $ getBasicAuth uo' (show (getUri req)) >>= \case Nothing -> return dne Just (ba, signalsuccess) -> do - ui <- existsconduit + ui <- existsconduit' (applyBasicAuth' ba req) (uo' { getBasicAuth = noBasicAuth }) signalsuccess (urlExists ui) @@ -301,6 +310,14 @@ getUrlInfo url uo = case parseURIRelaxed url of sz <- getFileSize' f stat found (Just sz) Nothing Nothing -> return dne + + -- When http server redirects to a protocol which conduit does not + -- support, it will throw a StatusCodeException with found302 + -- and a Response with the redir Location. + catchcrossprotoredir r a = + catchJust (matchStatusCodeException (== found302)) + (Right <$> a) + (followredir r) followredir r (HttpExceptionRequest _ (StatusCodeException resp _)) = case headMaybe $ map decodeBS $ getResponseHeader hLocation resp of