diff --git a/Annex/Content.hs b/Annex/Content.hs index fa8b35734f..8dd1dec0f2 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -645,7 +645,17 @@ downloadUrl k p iv urls file uo = go [] Nothing = return False go (u:us) _ = Url.download' p iv u file uo >>= \case Right () -> return True - Left err -> go us (Just err) + Left err -> do + -- If the incremental verifier was fed anything + -- while the download that failed ran, it's unable + -- to be used for the other urls. + case iv of + Just iv' -> + liftIO $ positionIncremental iv' >>= \case + Just n | n > 0 -> unableIncremental iv' + _ -> noop + Nothing -> noop + go us (Just err) {- Copies a key's content, when present, to a temp file. - This is used to speed up some rsyncs. -} diff --git a/Annex/Url.hs b/Annex/Url.hs index 1171aa42d5..9a9eff24a5 100644 --- a/Annex/Url.hs +++ b/Annex/Url.hs @@ -34,6 +34,7 @@ module Annex.Url ( import Annex.Common import qualified Annex import qualified Utility.Url as U +import Utility.Hash (IncrementalVerifier) import Utility.IPAddress #ifdef WITH_HTTP_CLIENT_RESTRICTED import Network.HTTP.Client.Restricted @@ -172,15 +173,15 @@ checkBoth url expected_size uo = Right r -> return r Left err -> warning err >> return False -download :: MeterUpdate -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool -download meterupdate url file uo = - liftIO (U.download meterupdate url file uo) >>= \case +download :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool +download meterupdate iv url file uo = + liftIO (U.download meterupdate iv url file uo) >>= \case Right () -> return True Left err -> warning err >> return False -download' :: MeterUpdate -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ()) -download' meterupdate url file uo = - liftIO (U.download meterupdate url file uo) +download' :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ()) +download' meterupdate iv url file uo = + liftIO (U.download meterupdate iv url file uo) exists :: U.URLString -> U.UrlOptions -> Annex Bool exists url uo = liftIO (U.exists url uo) >>= \case diff --git a/Assistant/Upgrade.hs b/Assistant/Upgrade.hs index 77ccaaf610..fbe3efa3bb 100644 --- a/Assistant/Upgrade.hs +++ b/Assistant/Upgrade.hs @@ -326,8 +326,8 @@ downloadDistributionInfo = do liftIO $ withTmpDir "git-annex.tmp" $ \tmpdir -> do let infof = tmpdir "info" let sigf = infof ++ ".sig" - ifM (isRight <$> Url.download nullMeterUpdate distributionInfoUrl infof uo - <&&> (isRight <$> Url.download nullMeterUpdate distributionInfoSigUrl sigf uo) + ifM (isRight <$> Url.download nullMeterUpdate Nothing distributionInfoUrl infof uo + <&&> (isRight <$> Url.download nullMeterUpdate Nothing distributionInfoSigUrl sigf uo) <&&> verifyDistributionSig gpgcmd sigf) ( parseInfoFile <$> readFileStrict infof , return Nothing diff --git a/CHANGELOG b/CHANGELOG index 4c878844d6..7b6b512017 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,8 +10,8 @@ git-annex (8.20210804) UNRELEASED; urgency=medium git-annex's own progress display. * Many special remotes now checksum content while it is being retrieved, instead of in a separate pass at the end. This is supported for most - special remotes on Linux (except for web, bittorrent, gitlfs, and S3), - and for a few on other OSs (directory, webdav, bup, ddar, gcrypt, + special remotes on Linux (except for bittorrent, gitlfs, and S3), + and for a few on other OSs (directory, web, webdav, bup, ddar, gcrypt, glacier). Special remotes using chunking or encryption also support it. But exporttree/importtree special remotes do not. diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index e12d44e2b5..70cc3239f9 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -314,7 +314,7 @@ downloadWeb addunlockedmatcher o url urlinfo file = go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just file)) where urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing - downloader f p = Url.withUrlOptions $ downloadUrl urlkey p [url] f + downloader f p = Url.withUrlOptions $ downloadUrl urlkey p Nothing [url] f go Nothing = return Nothing go (Just tmp) = ifM (pure (not (rawOption o)) <&&> liftIO (isHtmlFile (fromRawFilePath tmp))) ( tryyoutubedl tmp diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index f13ca405e3..9fbaf34ffd 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -173,7 +173,7 @@ downloadFeed url | Url.parseURIRelaxed url == Nothing = giveup "invalid feed url" | otherwise = withTmpFile "feed" $ \f h -> do liftIO $ hClose h - ifM (Url.withUrlOptions $ Url.download nullMeterUpdate url f) + ifM (Url.withUrlOptions $ Url.download nullMeterUpdate Nothing url f) ( Just <$> liftIO (readFileStrict f) , return Nothing ) diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 818676eb6d..3ce18f584c 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -215,7 +215,7 @@ downloadTorrentFile u = do liftIO $ hClose h resetAnnexFilePerm (toRawFilePath f) ok <- Url.withUrlOptions $ - Url.download nullMeterUpdate u f + Url.download nullMeterUpdate Nothing u f when ok $ liftIO $ renameFile f (fromRawFilePath torrent) return ok diff --git a/Remote/External.hs b/Remote/External.hs index 156b97d5dd..102b6caeff 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -808,9 +808,9 @@ checkUrlM external url = mkmulti (u, s, f) = (u, s, f) retrieveUrl :: Retriever -retrieveUrl = fileRetriever $ \f k p -> do +retrieveUrl = fileRetriever' $ \f k p iv -> do us <- getWebUrls k - unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $ + unlessM (withUrlOptions $ downloadUrl k p iv us (fromRawFilePath f)) $ giveup "failed to download content" checkKeyUrl :: CheckPresent diff --git a/Remote/Git.hs b/Remote/Git.hs index 08f7e90431..8d97c59a28 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -292,7 +292,7 @@ tryGitConfigRead autoinit r hasuuid let url = Git.repoLocation r ++ "/config" v <- withTmpFile "git-annex.tmp" $ \tmpfile h -> do liftIO $ hClose h - Url.download' nullMeterUpdate url tmpfile uo >>= \case + Url.download' nullMeterUpdate Nothing url tmpfile uo >>= \case Right () -> pipedconfig Git.Config.ConfigNullList False url "git" [ Param "config" @@ -540,12 +540,13 @@ copyFromRemote' forcersync r st key file dest meterupdate vc = do copyFromRemote'' :: Git.Repo -> Bool -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest meterupdate vc | Git.repoIsHttp repo = do + iv <- startVerifyKeyContentIncrementally vc key gc <- Annex.getGitConfig ok <- Url.withUrlOptionsPromptingCreds $ - Annex.Content.downloadUrl key meterupdate (keyUrls gc repo r key) dest + Annex.Content.downloadUrl key meterupdate iv (keyUrls gc repo r key) dest unless ok $ giveup "failed to download content" - return UnVerified + snd <$> finishVerifyKeyContentIncrementally iv | not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do u <- getUUID hardlink <- wantHardLink diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index f3e3d37e49..38dce48392 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -497,7 +497,7 @@ retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload Nothing -> giveup "unable to parse git-lfs server download url" Just req -> do uo <- getUrlOptions - liftIO $ downloadConduit p req (fromRawFilePath dest) uo + liftIO $ downloadConduit p Nothing req (fromRawFilePath dest) uo -- Since git-lfs does not support removing content, nothing needs to be -- done to lock content in the remote, except for checking that the content diff --git a/Remote/HttpAlso.hs b/Remote/HttpAlso.hs index 522100dfc9..79cc0771c8 100644 --- a/Remote/HttpAlso.hs +++ b/Remote/HttpAlso.hs @@ -126,7 +126,7 @@ downloadAction :: FilePath -> MeterUpdate -> Key -> ((URLString -> Annex (Either downloadAction dest p key run = Url.withUrlOptions $ \uo -> meteredFile dest (Just p) key $ - run (\url -> Url.download' p url dest uo) + run (\url -> Url.download' p Nothing url dest uo) >>= either giveup (const (return ())) checkKey :: Maybe URLString -> LearnedLayout -> Key -> Annex Bool diff --git a/Remote/S3.hs b/Remote/S3.hs index f2897e658d..b173bd36fd 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -413,7 +413,7 @@ retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case Left failreason -> do warning failreason giveup "cannot download content" - Right us -> unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $ + Right us -> unlessM (withUrlOptions $ downloadUrl k p Nothing us (fromRawFilePath f)) $ giveup "failed to download content" retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex () @@ -426,7 +426,7 @@ retrieveHelper info h loc f p = retrieveHelper' h f p $ retrieveHelper' :: S3Handle -> FilePath -> MeterUpdate -> S3.GetObject -> Annex () retrieveHelper' h f p req = liftIO $ runResourceT $ do S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle h req - Url.sinkResponseFile p zeroBytesProcessed f WriteMode rsp + Url.sinkResponseFile p Nothing zeroBytesProcessed f WriteMode rsp remove :: S3HandleVar -> Remote -> S3Info -> Remover remove hv r info k = withS3HandleOrFail (uuid r) hv $ \h -> do @@ -501,7 +501,7 @@ retrieveExportS3 hv r info _k loc f p = do Nothing -> case getPublicUrlMaker info of Just geturl -> either giveup return =<< Url.withUrlOptions - (Url.download' p (geturl exportloc) f) + (Url.download' p Nothing (geturl exportloc) f) Nothing -> giveup $ needS3Creds (uuid r) where exportloc = bucketExportLocation info loc diff --git a/Remote/Web.hs b/Remote/Web.hs index 049ed61120..938881e37a 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -1,6 +1,6 @@ {- Web remote. - - - Copyright 2011 Joey Hess + - Copyright 2011-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -13,6 +13,7 @@ import Remote.Helper.ExportImport import qualified Git import qualified Git.Construct import Annex.Content +import Annex.Verify import Config.Cost import Config import Logs.Web @@ -82,19 +83,29 @@ gen r _ rc gc rs = do } downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification -downloadKey key _af dest p _ = do - get =<< getWebUrls key - return UnVerified +downloadKey key _af dest p vc = go =<< getWebUrls key where - get [] = giveup "no known url" - get urls = do - r <- untilTrue urls $ \u -> do - let (u', downloader) = getDownloader u - case downloader of - YoutubeDownloader -> youtubeDlTo key u' dest p - _ -> Url.withUrlOptions $ downloadUrl key p [u'] dest - unless r $ - giveup "download failed" + go [] = giveup "no known url" + go urls = getM dl urls >>= \case + Just v -> return v + Nothing -> giveup "download failed" + + dl u = do + let (u', downloader) = getDownloader u + case downloader of + YoutubeDownloader -> + ifM (youtubeDlTo key u' dest p) + ( return (Just UnVerified) + , return Nothing + ) + _ -> do + iv <- startVerifyKeyContentIncrementally vc key + ifM (Url.withUrlOptions $ downloadUrl key p iv [u'] dest) + ( finishVerifyKeyContentIncrementally iv >>= \case + (True, v) -> return (Just v) + (False, _) -> return Nothing + , return Nothing + ) uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex () uploadKey _ _ _ = giveup "upload to web not supported" diff --git a/Utility/Url.hs b/Utility/Url.hs index 4f3a4125c2..3cf268f8bf 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -1,6 +1,6 @@ {- Url downloading. - - - Copyright 2011-2020 Joey Hess + - Copyright 2011-2021 Joey Hess - - License: BSD-2-clause -} @@ -53,6 +53,7 @@ import Utility.HttpManagerRestricted #endif import Utility.IPAddress import qualified Utility.RawFilePath as R +import Utility.Hash (IncrementalVerifier(..)) import Network.URI import Network.HTTP.Types @@ -363,11 +364,11 @@ headRequest r = r - - When the download fails, returns an error message. -} -download :: MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO (Either String ()) +download :: MeterUpdate -> Maybe IncrementalVerifier -> URLString -> FilePath -> UrlOptions -> IO (Either String ()) download = download' False -download' :: Bool -> MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO (Either String ()) -download' nocurlerror meterupdate url file uo = +download' :: Bool -> MeterUpdate -> Maybe IncrementalVerifier -> URLString -> FilePath -> UrlOptions -> IO (Either String ()) +download' nocurlerror meterupdate iv url file uo = catchJust matchHttpException go showhttpexception `catchNonAsync` (dlfailed . show) where @@ -376,7 +377,7 @@ download' nocurlerror meterupdate url file uo = case (urlDownloader uo, parseRequest (show u)) of (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust (matchStatusCodeException (== found302)) - (downloadConduit meterupdate req file uo >> return (Right ())) + (downloadConduit meterupdate iv req file uo >> return (Right ())) (followredir r) (DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) | isfileurl u -> downloadfile u @@ -404,7 +405,9 @@ download' nocurlerror meterupdate url file uo = HttpExceptionRequest _ other -> show other _ -> show he - dlfailed msg = return $ Left $ "download failed: " ++ msg + dlfailed msg = do + noverification + return $ Left $ "download failed: " ++ msg basecurlparams = curlParams uo [ if nocurlerror @@ -416,6 +419,7 @@ download' nocurlerror meterupdate url file uo = ] downloadcurl rawurl curlparams = do + noverification -- curl does not create destination file -- if the url happens to be empty, so pre-create. unlessM (doesFileExist file) $ @@ -429,6 +433,7 @@ download' nocurlerror meterupdate url file uo = downloadcurl rawurl =<< curlRestrictedParams r u defport basecurlparams downloadfile u = do + noverification let src = unEscapeString (uriPath u) withMeteredFile src meterupdate $ L.writeFile file @@ -446,6 +451,8 @@ download' nocurlerror meterupdate url file uo = Nothing -> throwIO ex followredir _ ex = throwIO ex + noverification = maybe noop unableIncremental iv + {- Download a perhaps large file using conduit, with auto-resume - of incomplete downloads. - @@ -456,8 +463,8 @@ download' nocurlerror meterupdate url file uo = - thrown for reasons other than http status codes will still be thrown - as usual.) -} -downloadConduit :: MeterUpdate -> Request -> FilePath -> UrlOptions -> IO () -downloadConduit meterupdate req file uo = +downloadConduit :: MeterUpdate -> Maybe IncrementalVerifier -> Request -> FilePath -> UrlOptions -> IO () +downloadConduit meterupdate iv req file uo = catchMaybeIO (getFileSize (toRawFilePath file)) >>= \case Just sz | sz > 0 -> resumedownload sz _ -> join $ runResourceT $ do @@ -504,7 +511,9 @@ downloadConduit meterupdate req file uo = store zeroBytesProcessed WriteMode resp return (return ()) else if alreadydownloaded sz resp - then return (return ()) + then do + liftIO noverification + return (return ()) else do rf <- extractFromResourceT (respfailure resp) if responseStatus resp == unauthorized401 @@ -529,13 +538,13 @@ downloadConduit meterupdate req file uo = Nothing -> True store initialp mode resp = - sinkResponseFile meterupdate initialp file mode resp + sinkResponseFile meterupdate iv initialp file mode resp respfailure = B8.toString . statusMessage . responseStatus retryauthed (ba, signalsuccess) = do r <- tryNonAsync $ downloadConduit - meterupdate + meterupdate iv (applyBasicAuth' ba req) file (uo { getBasicAuth = noBasicAuth }) @@ -545,32 +554,44 @@ downloadConduit meterupdate req file uo = () <- signalsuccess False throwM e -{- Sinks a Response's body to a file. The file can either be opened in - - WriteMode or AppendMode. Updates the meter as data is received. + noverification = maybe noop unableIncremental iv + +{- Sinks a Response's body to a file. The file can either be appended to + - (AppendMode), or written from the start of the response (WriteMode). + - Updates the meter and incremental verifier as data is received, + - when not appending. - - Note that the responseStatus is not checked by this function. -} sinkResponseFile :: MonadResource m => MeterUpdate + -> Maybe IncrementalVerifier -> BytesProcessed -> FilePath -> IOMode -> Response (ConduitM () B8.ByteString m ()) -> m () -sinkResponseFile meterupdate initialp file mode resp = do +sinkResponseFile meterupdate iv initialp file mode resp = do + ui <- case (iv, mode) of + (Just iv', AppendMode) -> do + liftIO $ unableIncremental iv' + return (const noop) + (Just iv', _) -> return (updateIncremental iv') + (Nothing, _) -> return (const noop) (fr, fh) <- allocate (openBinaryFile file mode) hClose - runConduit $ responseBody resp .| go initialp fh + runConduit $ responseBody resp .| go ui initialp fh release fr where - go sofar fh = await >>= \case + go ui sofar fh = await >>= \case Nothing -> return () Just bs -> do let sofar' = addBytesProcessed sofar (B.length bs) liftIO $ do void $ meterupdate sofar' + () <- ui bs B.hPut fh bs - go sofar' fh + go ui sofar' fh {- Downloads at least the specified number of bytes from an url. -} downloadPartial :: URLString -> UrlOptions -> Int -> IO (Maybe L.ByteString) diff --git a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment index e5bc881d88..22c4433fa7 100644 --- a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment +++ b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment @@ -5,24 +5,14 @@ content=""" The concurrency problem is fixed now. -Directory and webdav now also do incremental hashing. +Directory and webdav and web now also do incremental hashing. -There seems to have been a reversion in annex.verify handling; -I'm seeing directory do incremental hashing even when annex.verify is -false. Noticed while benchmarking it to see how much incremental hashing -sped it up. Seems that in Remote.Helper.Special, it uses -RemoteVerify baser, but when shouldVerify checks that value, it -sees that Types.Remote.isExportSupported is true. Despite the remote -not actually being an export remote. Because adjustExportImport gets -run after that point, I think.. (update: this is fixed) - -As well as the web special remote, these do not do incremental hashing +These do not do incremental hashing still: gitlfs, S3, httpalso. Problem is, these open the file for write. That prevents tailVerify re-opening it for read, because the haskell RTS actually does not allowing opening a file for read that it has open for write. The new `fileRetriever\`` can be used instead to fix these, -but will take some more work. Also, the git remote, when accessing a -repository over http does not do incremental hashing. +but will take some more work. Also, retrieval from export/import special remotes does not do incremental hashing (except for versioned ones, which sometimes use retrieveKeyFile).