From 4f42292b13dc5a6664eeb19b5c9d48991eaef292 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 1 Sep 2021 15:28:22 -0400 Subject: [PATCH] improve url download failure display * When downloading urls fail, explain which urls failed for which reasons. * web: Avoid displaying a warning when downloading one url failed but another url later succeeded. Some other uses of downloadUrl use urls that are effectively internal use, and should not all be displayed to the user on failure. Eg, Remote.Git tries different urls where content could be located depending on how the remote repo is set up. Exposing those urls to the user would lead to wild goose chases. So had to parameterize it to control whether it displays urls or not. A side effect of this change is that when there are some youtube urls and some regular urls, it will try regular urls first, even if the youtube urls are listed first. This seems like an improvement if anything, but in any case there's no defined order of urls that it's supposed to use. Sponsored-by: Dartmouth College's Datalad project --- Annex/Content.hs | 29 +++++++++----- CHANGELOG | 4 ++ Command/AddUrl.hs | 2 +- Remote/External.hs | 2 +- Remote/Git.hs | 2 +- Remote/S3.hs | 2 +- Remote/Web.hs | 40 ++++++++++--------- ...e_error_reporting_for_failed_attempts.mdwn | 12 ++++++ 8 files changed, 61 insertions(+), 32 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 8dd1dec0f2..da65143ab4 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -632,18 +632,20 @@ saveState nocommit = doSideAction $ do Annex.Branch.commit =<< Annex.Branch.commitMessage {- Downloads content from any of a list of urls, displaying a progress - - meter. -} -downloadUrl :: Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> FilePath -> Url.UrlOptions -> Annex Bool -downloadUrl k p iv urls file uo = + - meter. + - + - Only displays error message if all the urls fail to download. + - When listfailedurls is set, lists each url and why it failed. + - Otherwise, only displays one error message, from one of the urls + - that failed. + -} +downloadUrl :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> FilePath -> Url.UrlOptions -> Annex Bool +downloadUrl listfailedurls k p iv urls file uo = -- Poll the file to handle configurations where an external -- download command is used. - meteredFile file (Just p) k (go urls Nothing) + meteredFile file (Just p) k (go urls []) where - -- Display only one error message, if all the urls fail to - -- download. - go [] (Just err) = warning err >> return False - go [] Nothing = return False - go (u:us) _ = Url.download' p iv u file uo >>= \case + go (u:us) errs = Url.download' p iv u file uo >>= \case Right () -> return True Left err -> do -- If the incremental verifier was fed anything @@ -655,7 +657,14 @@ downloadUrl k p iv urls file uo = Just n | n > 0 -> unableIncremental iv' _ -> noop Nothing -> noop - go us (Just err) + go us ((u, err) : errs) + go [] [] = return False + go [] errs@((_, err):_) = do + if listfailedurls + then warning $ unlines $ flip map errs $ \(u, err') -> + u ++ " " ++ err' + else warning err + return False {- Copies a key's content, when present, to a temp file. - This is used to speed up some rsyncs. -} diff --git a/CHANGELOG b/CHANGELOG index 897b010072..7cb757be5f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -26,6 +26,10 @@ git-annex (8.20210804) UNRELEASED; urgency=medium a file's content, and fail with an informative message. * Fix support for readonly git remotes. (Reversion in version 8.20210621) + * When downloading urls fail, explain which urls failed for which + reasons. + * web: Avoid displaying a warning when downloading one url failed + but another url later succeeded. -- Joey Hess Tue, 03 Aug 2021 12:22:45 -0400 diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index eb99119ac4..f256aed2d8 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -314,7 +314,7 @@ downloadWeb addunlockedmatcher o url urlinfo file = go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file)) where urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing - downloader f p = Url.withUrlOptions $ downloadUrl urlkey p Nothing [url] f + downloader f p = Url.withUrlOptions $ downloadUrl False urlkey p Nothing [url] f go Nothing = return Nothing go (Just tmp) = ifM (pure (not (rawOption o)) <&&> liftIO (isHtmlFile (fromRawFilePath tmp))) ( tryyoutubedl tmp diff --git a/Remote/External.hs b/Remote/External.hs index 102b6caeff..1137cd74f5 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -810,7 +810,7 @@ checkUrlM external url = retrieveUrl :: Retriever retrieveUrl = fileRetriever' $ \f k p iv -> do us <- getWebUrls k - unlessM (withUrlOptions $ downloadUrl k p iv us (fromRawFilePath f)) $ + unlessM (withUrlOptions $ downloadUrl True k p iv us (fromRawFilePath f)) $ giveup "failed to download content" checkKeyUrl :: CheckPresent diff --git a/Remote/Git.hs b/Remote/Git.hs index 8d97c59a28..81a6bc5fdf 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -543,7 +543,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met iv <- startVerifyKeyContentIncrementally vc key gc <- Annex.getGitConfig ok <- Url.withUrlOptionsPromptingCreds $ - Annex.Content.downloadUrl key meterupdate iv (keyUrls gc repo r key) dest + Annex.Content.downloadUrl False key meterupdate iv (keyUrls gc repo r key) dest unless ok $ giveup "failed to download content" snd <$> finishVerifyKeyContentIncrementally iv diff --git a/Remote/S3.hs b/Remote/S3.hs index 9e0fd01f4a..7a4a8541b2 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -414,7 +414,7 @@ retrieve hv r rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case Left failreason -> do warning failreason giveup "cannot download content" - Right us -> unlessM (withUrlOptions $ downloadUrl k p iv us (fromRawFilePath f)) $ + Right us -> unlessM (withUrlOptions $ downloadUrl False k p iv us (fromRawFilePath f)) $ giveup "failed to download content" retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex () diff --git a/Remote/Web.hs b/Remote/Web.hs index 938881e37a..94e9391e33 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -86,26 +86,30 @@ downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig downloadKey key _af dest p vc = go =<< getWebUrls key where go [] = giveup "no known url" - go urls = getM dl urls >>= \case + go urls = dl (partition (not . isyoutube) (map getDownloader urls)) >>= \case Just v -> return v - Nothing -> giveup "download failed" + Nothing -> giveup $ unwords + [ "downloading from all" + , show (length urls) + , "known url(s) failed" + ] - dl u = do - let (u', downloader) = getDownloader u - case downloader of - YoutubeDownloader -> - ifM (youtubeDlTo key u' dest p) - ( return (Just UnVerified) - , return Nothing - ) - _ -> do - iv <- startVerifyKeyContentIncrementally vc key - ifM (Url.withUrlOptions $ downloadUrl key p iv [u'] dest) - ( finishVerifyKeyContentIncrementally iv >>= \case - (True, v) -> return (Just v) - (False, _) -> return Nothing - , return Nothing - ) + dl ([], ytus) = flip getM (map fst ytus) $ \u -> + ifM (youtubeDlTo key u dest p) + ( return (Just UnVerified) + , return Nothing + ) + dl (us, ytus) = do + iv <- startVerifyKeyContentIncrementally vc key + ifM (Url.withUrlOptions $ downloadUrl True key p iv (map fst us) dest) + ( finishVerifyKeyContentIncrementally iv >>= \case + (True, v) -> return (Just v) + (False, _) -> dl ([], ytus) + , dl ([], ytus) + ) + + isyoutube (_, YoutubeDownloader) = True + isyoutube _ = False uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex () uploadKey _ _ _ = giveup "upload to web not supported" diff --git a/doc/todo/get__58___improve_error_reporting_for_failed_attempts.mdwn b/doc/todo/get__58___improve_error_reporting_for_failed_attempts.mdwn index 0a79c556f3..3b89164781 100644 --- a/doc/todo/get__58___improve_error_reporting_for_failed_attempts.mdwn +++ b/doc/todo/get__58___improve_error_reporting_for_failed_attempts.mdwn @@ -41,3 +41,15 @@ refs in DataLad issues: - from web remote: ["download failed: Not Found"](https://github.com/datalad/datalad/pull/5936) - from ["failed to retrieve content from remote"](https://github.com/datalad/datalad/issues/5750) + +> I think this is specific to downloading urls, although it can happen +> for a few remotes (web, external). There's really no reason to display +> a download failed message if it successfully downloads a later url. +> (After all, if it had tried the working url first, it would never display +> anything about the broken url.) +> +> When all urls fail, it makes sense to display each url and why it failed +> when using the web (or external) remote, so the user can decide what to +> do about each of the problems. +> +> [[done]] --[[Joey]]