make --json-error-messages capture url download errors

Convert Utility.Url to return Either String so the error message can be
displated in the annex monad and so captured.

(When curl is used, its errors are still not caught.)
This commit is contained in:
Joey Hess 2019-11-12 13:33:41 -04:00
parent 99536e3a0b
commit 890330f0fe
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
16 changed files with 161 additions and 121 deletions

View file

@ -776,7 +776,7 @@ downloadUrl k p urls file =
-- download command is used.
meteredFile file (Just p) k $
Url.withUrlOptions $ \uo ->
liftIO $ anyM (\u -> Url.download p u file uo) urls
anyM (\u -> Url.download p u file uo) urls
{- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -}

View file

@ -1,24 +1,39 @@
{- Url downloading, with git-annex user agent and configured http
- headers, security restrictions, etc.
-
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Url (
module U,
withUrlOptions,
getUrlOptions,
getUserAgent,
ipAddressesUnlimited,
checkBoth,
download,
exists,
getUrlInfo,
U.downloadQuiet,
U.URLString,
U.UrlOptions(..),
U.UrlInfo(..),
U.sinkResponseFile,
U.matchStatusCodeException,
U.downloadConduit,
U.downloadPartial,
U.parseURIRelaxed,
U.allowedScheme,
U.assumeUrlExists,
) where
import Annex.Common
import qualified Annex
import Utility.Url as U
import qualified Utility.Url as U
import Utility.IPAddress
import Utility.HttpManagerRestricted
import Utility.Metered
import qualified BuildInfo
import Network.Socket
@ -43,7 +58,7 @@ getUrlOptions = Annex.getState Annex.urloptions >>= \case
where
mk = do
(urldownloader, manager) <- checkallowedaddr
mkUrlOptions
U.mkUrlOptions
<$> (Just <$> getUserAgent)
<*> headers
<*> pure urldownloader
@ -108,3 +123,27 @@ ipAddressesUnlimited =
withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a
withUrlOptions a = a =<< getUrlOptions
checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool
checkBoth url expected_size uo =
liftIO (U.checkBoth url expected_size uo) >>= \case
Right r -> return r
Left err -> warning err >> return False
download :: MeterUpdate -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool
download meterupdate url file uo =
liftIO (U.download meterupdate url file uo) >>= \case
Right () -> return True
Left err -> warning err >> return False
exists :: U.URLString -> U.UrlOptions -> Annex Bool
exists url uo = liftIO (U.exists url uo) >>= \case
Right b -> return b
Left err -> warning err >> return False
getUrlInfo :: U.URLString -> U.UrlOptions -> Annex U.UrlInfo
getUrlInfo url uo = liftIO (U.getUrlInfo url uo) >>= \case
Right i -> return i
Left err -> do
warning err
return $ U.UrlInfo False Nothing Nothing

View file

@ -18,7 +18,6 @@ import Annex.Common
import qualified Annex
import Annex.Content
import Annex.Url
import Utility.Url (URLString)
import Utility.DiskFree
import Utility.HtmlDetect
import Utility.Process.Transcript

View file

@ -95,7 +95,9 @@ newAssistantUrl repo = do
- warp-tls listens to http, in order to show an error page, so this works.
-}
assistantListening :: URLString -> IO Bool
assistantListening url = catchBoolIO $ exists url' =<< defUrlOptions
assistantListening url = catchBoolIO $ do
uo <- defUrlOptions
(== Right True) <$> exists url' uo
where
url' = case parseURI url of
Nothing -> url

View file

@ -40,9 +40,10 @@ import Utility.Metered
import qualified Utility.Lsof as Lsof
import qualified BuildInfo
import qualified Utility.Url as Url
import qualified Annex.Url as Url
import qualified Annex.Url as Url hiding (download)
import Utility.Tuple
import Data.Either
import qualified Data.Map as M
{- Upgrade without interaction in the webapp. -}
@ -323,8 +324,8 @@ downloadDistributionInfo = do
liftIO $ withTmpDir "git-annex.tmp" $ \tmpdir -> do
let infof = tmpdir </> "info"
let sigf = infof ++ ".sig"
ifM (Url.download nullMeterUpdate distributionInfoUrl infof uo
<&&> Url.download nullMeterUpdate distributionInfoSigUrl sigf uo
ifM (isRight <$> Url.download nullMeterUpdate distributionInfoUrl infof uo
<&&> (isRight <$> Url.download nullMeterUpdate distributionInfoSigUrl sigf uo)
<&&> verifyDistributionSig gpgcmd sigf)
( parseInfoFile <$> readFileStrict infof
, return Nothing

View file

@ -192,7 +192,7 @@ escapeHeader = escapeURIString (\c -> isUnescapedInURI c && c /= ' ')
getRepoInfo :: RemoteConfig -> Widget
getRepoInfo c = do
uo <- liftAnnex Url.getUrlOptions
exists <- liftIO $ catchDefaultIO False $ Url.exists url uo
exists <- liftAnnex $ catchDefaultIO False $ Url.exists url uo
[whamlet|
<a href="#{url}">
Internet Archive item

View file

@ -1,6 +1,8 @@
git-annex (7.20191107) UNRELEASED; urgency=medium
* Added annex.allowsign option.
* Make --json-error-messages capture more errors,
particularly url download errors.
-- Joey Hess <id@joeyh.name> Mon, 11 Nov 2019 15:59:47 -0400

View file

@ -197,8 +197,7 @@ startWeb o urlstring = go $ fromMaybe bad $ parseURI urlstring
pathmax <- liftIO $ fileNameLengthLimit "."
urlinfo <- if relaxedOption (downloadOptions o)
then pure Url.assumeUrlExists
else Url.withUrlOptions $
liftIO . Url.getUrlInfo urlstring
else Url.withUrlOptions $ Url.getUrlInfo urlstring
file <- adjustFile o <$> case fileOption (downloadOptions o) of
Just f -> pure f
Nothing -> case Url.urlSuggestedFile urlinfo of

View file

@ -146,13 +146,12 @@ findDownloads u f = catMaybes $ map mk (feedItems f)
downloadFeed :: URLString -> Annex (Maybe String)
downloadFeed url
| Url.parseURIRelaxed url == Nothing = giveup "invalid feed url"
| otherwise = Url.withUrlOptions $ \uo ->
liftIO $ withTmpFile "feed" $ \f h -> do
hClose h
ifM (Url.download nullMeterUpdate url f uo)
( Just <$> readFileStrict f
, return Nothing
)
| otherwise = withTmpFile "feed" $ \f h -> do
liftIO $ hClose h
ifM (Url.withUrlOptions $ Url.download nullMeterUpdate url f)
( Just <$> liftIO (readFileStrict f)
, return Nothing
)
performDownload :: ImportFeedOptions -> Cache -> ToDownload -> Annex Bool
performDownload opts cache todownload = case location todownload of
@ -164,7 +163,7 @@ performDownload opts cache todownload = case location todownload of
urlinfo <- if relaxedOption (downloadOptions opts)
then pure Url.assumeUrlExists
else Url.withUrlOptions $
liftIO . Url.getUrlInfo url
Url.getUrlInfo url
let dlopts = (downloadOptions opts)
-- force using the filename
-- chosen here

View file

@ -206,7 +206,7 @@ downloadTorrentFile u = do
withTmpFileIn othertmp "torrent" $ \f h -> do
liftIO $ hClose h
ok <- Url.withUrlOptions $
liftIO . Url.download nullMeterUpdate u f
Url.download nullMeterUpdate u f
when ok $
liftIO $ renameFile f torrent
return ok

View file

@ -716,7 +716,7 @@ checkKeyUrl :: Git.Repo -> CheckPresent
checkKeyUrl r k = do
showChecking r
us <- getWebUrls k
anyM (\u -> withUrlOptions $ liftIO . checkBoth u (keySize k)) us
anyM (\u -> withUrlOptions $ checkBoth u (keySize k)) us
getWebUrls :: Key -> Annex [URLString]
getWebUrls key = filter supported <$> getUrls key

View file

@ -352,11 +352,10 @@ inAnnex' repo rmt (State connpool duc _ _) key
checkhttp = do
showChecking repo
gc <- Annex.getGitConfig
ifM (Url.withUrlOptions $ \uo -> liftIO $
anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls gc repo rmt key))
( return True
, giveup "not found"
)
ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls gc repo rmt key))
( return True
, giveup "not found"
)
checkremote =
let fallback = Ssh.inAnnex repo key
in P2PHelper.checkpresent (Ssh.runProto rmt connpool (cantCheck rmt) fallback) key

View file

@ -58,11 +58,10 @@ import Logs.Web
import Logs.MetaData
import Types.MetaData
import Utility.Metered
import qualified Annex.Url as Url
import Utility.DataUnits
import Annex.Content
import Annex.Url (getUrlOptions, withUrlOptions)
import Utility.Url (checkBoth, UrlOptions(..))
import qualified Annex.Url as Url
import Annex.Url (getUrlOptions, withUrlOptions, UrlOptions(..))
import Utility.Env
type BucketName = String
@ -348,7 +347,7 @@ checkKey hv r rs c info k = withS3Handle hv $ \case
Right us -> do
showChecking r
let check u = withUrlOptions $
liftIO . checkBoth u (keySize k)
Url.checkBoth u (keySize k)
anyM check us
checkKeyHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> Annex Bool
@ -397,7 +396,7 @@ retrieveExportS3 hv r info _k loc f p =
warning $ needS3Creds (uuid r)
return False
Just geturl -> Url.withUrlOptions $
liftIO . Url.download p (geturl exportloc) f
Url.download p (geturl exportloc) f
exportloc = bucketExportLocation info loc
removeExportS3 :: S3HandleVar -> Remote -> RemoteStateHandle -> S3Info -> Key -> ExportLocation -> Annex Bool
@ -417,8 +416,8 @@ checkPresentExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation
checkPresentExportS3 hv r info k loc = withS3Handle hv $ \case
Just h -> checkKeyHelper info h (Left (T.pack $ bucketExportLocation info loc))
Nothing -> case getPublicUrlMaker info of
Just geturl -> withUrlOptions $ liftIO .
checkBoth (geturl $ bucketExportLocation info loc) (keySize k)
Just geturl -> withUrlOptions $
Url.checkBoth (geturl $ bucketExportLocation info loc) (keySize k)
Nothing -> do
warning $ needS3Creds (uuid r)
giveup "No S3 credentials configured"

View file

@ -116,9 +116,8 @@ checkKey' key us = firsthit us (Right False) $ \u -> do
showChecking u'
case downloader of
YoutubeDownloader -> youtubeDlCheck u'
_ -> do
Url.withUrlOptions $ liftIO . catchMsgIO .
Url.checkBoth u' (keySize key)
_ -> catchMsgIO $
Url.withUrlOptions $ Url.checkBoth u' (keySize key)
where
firsthit [] miss _ = return miss
firsthit (u:rest) _ a = do

View file

@ -138,22 +138,14 @@ curlParams uo ps = ps ++ uaparams ++ headerparams ++ addedparams ++ schemeparams
]
schemelist = map fromScheme $ S.toList $ allowedSchemes uo
checkPolicy :: UrlOptions -> URI -> a -> (String -> IO b) -> IO a -> IO a
checkPolicy uo u onerr displayerror a
checkPolicy :: UrlOptions -> URI -> IO (Either String a) -> IO (Either String a)
checkPolicy uo u a
| allowedScheme uo u = a
| otherwise = do
void $ displayerror $
"Configuration does not allow accessing " ++ show u
return onerr
| otherwise = return $ Left $
"Configuration does not allow accessing " ++ show u
unsupportedUrlScheme :: URI -> (String -> IO a) -> IO a
unsupportedUrlScheme u displayerror =
displayerror $ "Unsupported url scheme " ++ show u
warnError :: String -> IO ()
warnError msg = do
hPutStrLn stderr msg
hFlush stderr
unsupportedUrlScheme :: URI -> String
unsupportedUrlScheme u = "Unsupported url scheme " ++ show u
allowedScheme :: UrlOptions -> URI -> Bool
allowedScheme uo u = uscheme `S.member` allowedSchemes uo
@ -161,14 +153,18 @@ allowedScheme uo u = uscheme `S.member` allowedSchemes uo
uscheme = mkScheme $ takeWhile (/=':') (uriScheme u)
{- Checks that an url exists and could be successfully downloaded,
- also checking that its size, if available, matches a specified size. -}
checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO Bool
checkBoth url expected_size uo = do
v <- check url expected_size uo
return (fst v && snd v)
- also checking that its size, if available, matches a specified size.
-
- The Left error is returned if policy does not allow accessing the url
- or the url scheme is not supported.
-}
checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO (Either String Bool)
checkBoth url expected_size uo = fmap go <$> check url expected_size uo
where
go v = fst v && snd v
check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool)
check url expected_size uo = go <$> getUrlInfo url uo
check :: URLString -> Maybe Integer -> UrlOptions -> IO (Either String (Bool, Bool))
check url expected_size uo = fmap go <$> getUrlInfo url uo
where
go (UrlInfo False _ _) = (False, False)
go (UrlInfo True Nothing _) = (True, True)
@ -176,8 +172,8 @@ check url expected_size uo = go <$> getUrlInfo url uo
Just _ -> (True, expected_size == s)
Nothing -> (True, True)
exists :: URLString -> UrlOptions -> IO Bool
exists url uo = urlExists <$> getUrlInfo url uo
exists :: URLString -> UrlOptions -> IO (Either String Bool)
exists url uo = fmap urlExists <$> getUrlInfo url uo
data UrlInfo = UrlInfo
{ urlExists :: Bool
@ -190,32 +186,36 @@ assumeUrlExists :: UrlInfo
assumeUrlExists = UrlInfo True Nothing Nothing
{- Checks that an url exists and could be successfully downloaded,
- also returning its size and suggested filename if available. -}
getUrlInfo :: URLString -> UrlOptions -> IO UrlInfo
- also returning its size and suggested filename if available.
-
- The Left error is returned if policy does not allow accessing the url
- or the url scheme is not supported.
-}
getUrlInfo :: URLString -> UrlOptions -> IO (Either String UrlInfo)
getUrlInfo url uo = case parseURIRelaxed url of
Just u -> checkPolicy uo u dne warnError $
case (urlDownloader uo, parseUrlRequest (show u)) of
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust
-- When http redirects to a protocol which
-- conduit does not support, it will throw
-- a StatusCodeException with found302
-- and a Response with the redir Location.
(matchStatusCodeException (== found302))
(existsconduit req)
(followredir r)
`catchNonAsync` (const $ return dne)
(DownloadWithConduit (DownloadWithCurlRestricted r), Nothing)
| isfileurl u -> existsfile u
| isftpurl u -> existscurlrestricted r u url ftpport
`catchNonAsync` (const $ return dne)
| otherwise -> do
unsupportedUrlScheme u warnError
return dne
(DownloadWithCurl _, _)
| isfileurl u -> existsfile u
| otherwise -> existscurl u (basecurlparams url)
Nothing -> return dne
where
Just u -> checkPolicy uo u (go u)
Nothing -> return (Right dne)
where
go :: URI -> IO (Either String UrlInfo)
go u = case (urlDownloader uo, parseUrlRequest (show u)) of
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust
-- When http redirects to a protocol which
-- conduit does not support, it will throw
-- a StatusCodeException with found302
-- and a Response with the redir Location.
(matchStatusCodeException (== found302))
(Right <$> existsconduit req)
(followredir r)
`catchNonAsync` (const $ return $ Right dne)
(DownloadWithConduit (DownloadWithCurlRestricted r), Nothing)
| isfileurl u -> Right <$> existsfile u
| isftpurl u -> (Right <$> existscurlrestricted r u url ftpport)
`catchNonAsync` (const $ return $ Right dne)
| otherwise -> return $ Left $ unsupportedUrlScheme u
(DownloadWithCurl _, _)
| isfileurl u -> Right <$> existsfile u
| otherwise -> Right <$> existscurl u (basecurlparams url)
dne = UrlInfo False Nothing Nothing
found sz f = return $ UrlInfo True sz f
@ -291,11 +291,11 @@ getUrlInfo url uo = case parseURIRelaxed url of
-- http to file redirect would not be secure,
-- and http-conduit follows http to http.
Just u' | isftpurl u' ->
checkPolicy uo u' dne warnError $
checkPolicy uo u' $ Right <$>
existscurlrestricted r u' url' ftpport
_ -> return dne
Nothing -> return dne
followredir _ _ = return dne
_ -> return (Right dne)
Nothing -> return (Right dne)
followredir _ _ = return (Right dne)
-- Parse eg: attachment; filename="fname.ext"
-- per RFC 2616
@ -317,31 +317,32 @@ headRequest r = r
{- Download a perhaps large file, with auto-resume of incomplete downloads.
-
- Displays error message on stderr when download failed.
- When the download fails, returns an error message.
-}
download :: MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO Bool
download :: MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO (Either String ())
download = download' False
{- Avoids displaying any error message. -}
{- Avoids displaying any error message, including silencing curl errors. -}
downloadQuiet :: MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO Bool
downloadQuiet = download' True
downloadQuiet meterupdate url file uo = isRight
<$> download' True meterupdate url file uo
download' :: Bool -> MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO Bool
download' noerror meterupdate url file uo =
download' :: Bool -> MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO (Either String ())
download' nocurlerror meterupdate url file uo =
catchJust matchHttpException go showhttpexception
`catchNonAsync` (dlfailed . show)
where
go = case parseURIRelaxed url of
Just u -> checkPolicy uo u False dlfailed $
Just u -> checkPolicy uo u $
case (urlDownloader uo, parseUrlRequest (show u)) of
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust
(matchStatusCodeException (== found302))
(downloadConduit meterupdate req file uo >> return True)
(downloadConduit meterupdate req file uo >> return (Right ()))
(followredir r)
(DownloadWithConduit (DownloadWithCurlRestricted r), Nothing)
| isfileurl u -> downloadfile u
| isftpurl u -> downloadcurlrestricted r u url ftpport
| otherwise -> unsupportedUrlScheme u dlfailed
| otherwise -> dlfailed $ unsupportedUrlScheme u
(DownloadWithCurl _, _)
| isfileurl u -> downloadfile u
| otherwise -> downloadcurl url basecurlparams
@ -354,27 +355,20 @@ download' noerror meterupdate url file uo =
ftpport = 21
showhttpexception he = do
let msg = case he of
HttpExceptionRequest _ (StatusCodeException r _) ->
B8.toString $ statusMessage $ responseStatus r
HttpExceptionRequest _ (InternalException ie) ->
case fromException ie of
Nothing -> show ie
Just (ConnectionRestricted why) -> why
HttpExceptionRequest _ other -> show other
_ -> show he
dlfailed msg
showhttpexception he = dlfailed $ case he of
HttpExceptionRequest _ (StatusCodeException r _) ->
B8.toString $ statusMessage $ responseStatus r
HttpExceptionRequest _ (InternalException ie) ->
case fromException ie of
Nothing -> show ie
Just (ConnectionRestricted why) -> why
HttpExceptionRequest _ other -> show other
_ -> show he
dlfailed msg
| noerror = return False
| otherwise = do
hPutStrLn stderr $ "download failed: " ++ msg
hFlush stderr
return False
dlfailed msg = return $ Left $ "download failed: " ++ msg
basecurlparams = curlParams uo
[ if noerror
[ if nocurlerror
then Param "-S"
else Param "-sS"
, Param "-f"
@ -387,7 +381,10 @@ download' noerror meterupdate url file uo =
-- if the url happens to be empty, so pre-create.
unlessM (doesFileExist file) $
writeFile file ""
boolSystem "curl" (curlparams ++ [Param "-o", File file, File rawurl])
ifM (boolSystem "curl" (curlparams ++ [Param "-o", File file, File rawurl]))
( return $ Right ()
, return $ Left "download failed"
)
downloadcurlrestricted r u rawurl defport =
downloadcurl rawurl =<< curlRestrictedParams r u defport basecurlparams
@ -396,7 +393,7 @@ download' noerror meterupdate url file uo =
let src = unEscapeString (uriPath u)
withMeteredFile src meterupdate $
L.writeFile file
return True
return $ Right ()
-- Conduit does not support ftp, so will throw an exception on a
-- redirect to a ftp url; fall back to curl.
@ -404,7 +401,7 @@ download' noerror meterupdate url file uo =
case headMaybe $ map decodeBS $ getResponseHeader hLocation resp of
Just url' -> case parseURIRelaxed url' of
Just u' | isftpurl u' ->
checkPolicy uo u' False dlfailed $
checkPolicy uo u' $
downloadcurlrestricted r u' url' ftpport
_ -> throwIO ex
Nothing -> throwIO ex

View file

@ -22,3 +22,8 @@ If user convenience was something to strive for here, it should technically be p
[[!meta author=yoh]]
[[!tag projects/datalad]]
> [[fixed|done]], and I also converted a number of other places
> where an error could leak through to stderr, although there are still
> some places where direct writes to stderr happen -- I'll probably never
> be able to guarantee --json-error-messages catches every possible stderr
> output. --[[Joey]]