incremental verification for web special remote

Except when configuration makes curl be used. It did not seem worth
trying to tail the file when curl is downloading.

But when an interrupted download is resumed, it does not read the whole
existing file to hash it. Same reason discussed in
commit 7eb3742e4b; that could take a long
time with no progress being displayed. And also there's an open http
request, which needs to be consumed; taking a long time to hash the file
might cause it to time out.

Also in passing implemented it for git and external special remotes when
downloading from the web. Several others like S3 are within striking
distance now as well.

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2021-08-18 14:49:01 -04:00
parent 88b63a43fa
commit d154e7022e
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
15 changed files with 101 additions and 67 deletions

View file

@ -215,7 +215,7 @@ downloadTorrentFile u = do
liftIO $ hClose h
resetAnnexFilePerm (toRawFilePath f)
ok <- Url.withUrlOptions $
Url.download nullMeterUpdate u f
Url.download nullMeterUpdate Nothing u f
when ok $
liftIO $ renameFile f (fromRawFilePath torrent)
return ok

View file

@ -808,9 +808,9 @@ checkUrlM external url =
mkmulti (u, s, f) = (u, s, f)
retrieveUrl :: Retriever
retrieveUrl = fileRetriever $ \f k p -> do
retrieveUrl = fileRetriever' $ \f k p iv -> do
us <- getWebUrls k
unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
unlessM (withUrlOptions $ downloadUrl k p iv us (fromRawFilePath f)) $
giveup "failed to download content"
checkKeyUrl :: CheckPresent

View file

@ -292,7 +292,7 @@ tryGitConfigRead autoinit r hasuuid
let url = Git.repoLocation r ++ "/config"
v <- withTmpFile "git-annex.tmp" $ \tmpfile h -> do
liftIO $ hClose h
Url.download' nullMeterUpdate url tmpfile uo >>= \case
Url.download' nullMeterUpdate Nothing url tmpfile uo >>= \case
Right () -> pipedconfig Git.Config.ConfigNullList
False url "git"
[ Param "config"
@ -540,12 +540,13 @@ copyFromRemote' forcersync r st key file dest meterupdate vc = do
copyFromRemote'' :: Git.Repo -> Bool -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest meterupdate vc
| Git.repoIsHttp repo = do
iv <- startVerifyKeyContentIncrementally vc key
gc <- Annex.getGitConfig
ok <- Url.withUrlOptionsPromptingCreds $
Annex.Content.downloadUrl key meterupdate (keyUrls gc repo r key) dest
Annex.Content.downloadUrl key meterupdate iv (keyUrls gc repo r key) dest
unless ok $
giveup "failed to download content"
return UnVerified
snd <$> finishVerifyKeyContentIncrementally iv
| not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do
u <- getUUID
hardlink <- wantHardLink

View file

@ -497,7 +497,7 @@ retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload
Nothing -> giveup "unable to parse git-lfs server download url"
Just req -> do
uo <- getUrlOptions
liftIO $ downloadConduit p req (fromRawFilePath dest) uo
liftIO $ downloadConduit p Nothing req (fromRawFilePath dest) uo
-- Since git-lfs does not support removing content, nothing needs to be
-- done to lock content in the remote, except for checking that the content

View file

@ -126,7 +126,7 @@ downloadAction :: FilePath -> MeterUpdate -> Key -> ((URLString -> Annex (Either
downloadAction dest p key run =
Url.withUrlOptions $ \uo ->
meteredFile dest (Just p) key $
run (\url -> Url.download' p url dest uo)
run (\url -> Url.download' p Nothing url dest uo)
>>= either giveup (const (return ()))
checkKey :: Maybe URLString -> LearnedLayout -> Key -> Annex Bool

View file

@ -413,7 +413,7 @@ retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case
Left failreason -> do
warning failreason
giveup "cannot download content"
Right us -> unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
Right us -> unlessM (withUrlOptions $ downloadUrl k p Nothing us (fromRawFilePath f)) $
giveup "failed to download content"
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()
@ -426,7 +426,7 @@ retrieveHelper info h loc f p = retrieveHelper' h f p $
retrieveHelper' :: S3Handle -> FilePath -> MeterUpdate -> S3.GetObject -> Annex ()
retrieveHelper' h f p req = liftIO $ runResourceT $ do
S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle h req
Url.sinkResponseFile p zeroBytesProcessed f WriteMode rsp
Url.sinkResponseFile p Nothing zeroBytesProcessed f WriteMode rsp
remove :: S3HandleVar -> Remote -> S3Info -> Remover
remove hv r info k = withS3HandleOrFail (uuid r) hv $ \h -> do
@ -501,7 +501,7 @@ retrieveExportS3 hv r info _k loc f p = do
Nothing -> case getPublicUrlMaker info of
Just geturl -> either giveup return =<<
Url.withUrlOptions
(Url.download' p (geturl exportloc) f)
(Url.download' p Nothing (geturl exportloc) f)
Nothing -> giveup $ needS3Creds (uuid r)
where
exportloc = bucketExportLocation info loc

View file

@ -1,6 +1,6 @@
{- Web remote.
-
- Copyright 2011 Joey Hess <id@joeyh.name>
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -13,6 +13,7 @@ import Remote.Helper.ExportImport
import qualified Git
import qualified Git.Construct
import Annex.Content
import Annex.Verify
import Config.Cost
import Config
import Logs.Web
@ -82,19 +83,29 @@ gen r _ rc gc rs = do
}
downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
downloadKey key _af dest p _ = do
get =<< getWebUrls key
return UnVerified
downloadKey key _af dest p vc = go =<< getWebUrls key
where
get [] = giveup "no known url"
get urls = do
r <- untilTrue urls $ \u -> do
let (u', downloader) = getDownloader u
case downloader of
YoutubeDownloader -> youtubeDlTo key u' dest p
_ -> Url.withUrlOptions $ downloadUrl key p [u'] dest
unless r $
giveup "download failed"
go [] = giveup "no known url"
go urls = getM dl urls >>= \case
Just v -> return v
Nothing -> giveup "download failed"
dl u = do
let (u', downloader) = getDownloader u
case downloader of
YoutubeDownloader ->
ifM (youtubeDlTo key u' dest p)
( return (Just UnVerified)
, return Nothing
)
_ -> do
iv <- startVerifyKeyContentIncrementally vc key
ifM (Url.withUrlOptions $ downloadUrl key p iv [u'] dest)
( finishVerifyKeyContentIncrementally iv >>= \case
(True, v) -> return (Just v)
(False, _) -> return Nothing
, return Nothing
)
uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex ()
uploadKey _ _ _ = giveup "upload to web not supported"