improve url download failure display

* When downloading urls fail, explain which urls failed for which
  reasons.
* web: Avoid displaying a warning when downloading one url failed
  but another url later succeeded.

Some other uses of downloadUrl use urls that are effectively internal use,
and should not all be displayed to the user on failure. Eg, Remote.Git
tries different urls where content could be located depending on how the
remote repo is set up. Exposing those urls to the user would lead to wild
goose chases. So had to parameterize it to control whether it displays urls
or not.

A side effect of this change is that when there are some youtube urls
and some regular urls, it will try regular urls first, even if the
youtube urls are listed first. This seems like an improvement if
anything, but in any case there's no defined order of urls that it's
supposed to use.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2021-09-01 15:28:22 -04:00
parent 9cd1430fd6
commit 4f42292b13
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 61 additions and 32 deletions

View file

@ -632,18 +632,20 @@ saveState nocommit = doSideAction $ do
Annex.Branch.commit =<< Annex.Branch.commitMessage
{- Downloads content from any of a list of urls, displaying a progress
- meter. -}
downloadUrl :: Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> FilePath -> Url.UrlOptions -> Annex Bool
downloadUrl k p iv urls file uo =
- meter.
-
- Only displays error message if all the urls fail to download.
- When listfailedurls is set, lists each url and why it failed.
- Otherwise, only displays one error message, from one of the urls
- that failed.
-}
downloadUrl :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> FilePath -> Url.UrlOptions -> Annex Bool
downloadUrl listfailedurls k p iv urls file uo =
-- Poll the file to handle configurations where an external
-- download command is used.
meteredFile file (Just p) k (go urls Nothing)
meteredFile file (Just p) k (go urls [])
where
-- Display only one error message, if all the urls fail to
-- download.
go [] (Just err) = warning err >> return False
go [] Nothing = return False
go (u:us) _ = Url.download' p iv u file uo >>= \case
go (u:us) errs = Url.download' p iv u file uo >>= \case
Right () -> return True
Left err -> do
-- If the incremental verifier was fed anything
@ -655,7 +657,14 @@ downloadUrl k p iv urls file uo =
Just n | n > 0 -> unableIncremental iv'
_ -> noop
Nothing -> noop
go us (Just err)
go us ((u, err) : errs)
go [] [] = return False
go [] errs@((_, err):_) = do
if listfailedurls
then warning $ unlines $ flip map errs $ \(u, err') ->
u ++ " " ++ err'
else warning err
return False
{- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -}

View file

@ -26,6 +26,10 @@ git-annex (8.20210804) UNRELEASED; urgency=medium
a file's content, and fail with an informative message.
* Fix support for readonly git remotes.
(Reversion in version 8.20210621)
* When downloading urls fail, explain which urls failed for which
reasons.
* web: Avoid displaying a warning when downloading one url failed
but another url later succeeded.
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400

View file

@ -314,7 +314,7 @@ downloadWeb addunlockedmatcher o url urlinfo file =
go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file))
where
urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
downloader f p = Url.withUrlOptions $ downloadUrl urlkey p Nothing [url] f
downloader f p = Url.withUrlOptions $ downloadUrl False urlkey p Nothing [url] f
go Nothing = return Nothing
go (Just tmp) = ifM (pure (not (rawOption o)) <&&> liftIO (isHtmlFile (fromRawFilePath tmp)))
( tryyoutubedl tmp

View file

@ -810,7 +810,7 @@ checkUrlM external url =
retrieveUrl :: Retriever
retrieveUrl = fileRetriever' $ \f k p iv -> do
us <- getWebUrls k
unlessM (withUrlOptions $ downloadUrl k p iv us (fromRawFilePath f)) $
unlessM (withUrlOptions $ downloadUrl True k p iv us (fromRawFilePath f)) $
giveup "failed to download content"
checkKeyUrl :: CheckPresent

View file

@ -543,7 +543,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met
iv <- startVerifyKeyContentIncrementally vc key
gc <- Annex.getGitConfig
ok <- Url.withUrlOptionsPromptingCreds $
Annex.Content.downloadUrl key meterupdate iv (keyUrls gc repo r key) dest
Annex.Content.downloadUrl False key meterupdate iv (keyUrls gc repo r key) dest
unless ok $
giveup "failed to download content"
snd <$> finishVerifyKeyContentIncrementally iv

View file

@ -414,7 +414,7 @@ retrieve hv r rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
Left failreason -> do
warning failreason
giveup "cannot download content"
Right us -> unlessM (withUrlOptions $ downloadUrl k p iv us (fromRawFilePath f)) $
Right us -> unlessM (withUrlOptions $ downloadUrl False k p iv us (fromRawFilePath f)) $
giveup "failed to download content"
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()

View file

@ -86,27 +86,31 @@ downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig
downloadKey key _af dest p vc = go =<< getWebUrls key
where
go [] = giveup "no known url"
go urls = getM dl urls >>= \case
go urls = dl (partition (not . isyoutube) (map getDownloader urls)) >>= \case
Just v -> return v
Nothing -> giveup "download failed"
Nothing -> giveup $ unwords
[ "downloading from all"
, show (length urls)
, "known url(s) failed"
]
dl u = do
let (u', downloader) = getDownloader u
case downloader of
YoutubeDownloader ->
ifM (youtubeDlTo key u' dest p)
dl ([], ytus) = flip getM (map fst ytus) $ \u ->
ifM (youtubeDlTo key u dest p)
( return (Just UnVerified)
, return Nothing
)
_ -> do
dl (us, ytus) = do
iv <- startVerifyKeyContentIncrementally vc key
ifM (Url.withUrlOptions $ downloadUrl key p iv [u'] dest)
ifM (Url.withUrlOptions $ downloadUrl True key p iv (map fst us) dest)
( finishVerifyKeyContentIncrementally iv >>= \case
(True, v) -> return (Just v)
(False, _) -> return Nothing
, return Nothing
(False, _) -> dl ([], ytus)
, dl ([], ytus)
)
isyoutube (_, YoutubeDownloader) = True
isyoutube _ = False
uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex ()
uploadKey _ _ _ = giveup "upload to web not supported"

View file

@ -41,3 +41,15 @@ refs in DataLad issues:
- from web remote: ["download failed: Not Found"](https://github.com/datalad/datalad/pull/5936)
- from ["failed to retrieve content from remote"](https://github.com/datalad/datalad/issues/5750)
> I think this is specific to downloading urls, although it can happen
> for a few remotes (web, external). There's really no reason to display
> a download failed message if it successfully downloads a later url.
> (After all, if it had tried the working url first, it would never display
> anything about the broken url.)
>
> When all urls fail, it makes sense to display each url and why it failed
> when using the web (or external) remote, so the user can decide what to
> do about each of the problems.
>
> [[done]] --[[Joey]]