diff --git a/Backend/Hash.hs b/Backend/Hash.hs index f55fa68994..ec478890fb 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -124,7 +124,7 @@ checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do exists <- liftIO $ R.doesPathExist file case (exists, fast) of (True, False) -> do - showAction "checksum" + showAction descChecksum sameCheckSum key <$> hashFile hash file nullMeterUpdate _ -> return True @@ -293,8 +293,12 @@ mkIncrementalVerifier ctx key = do return $ sameCheckSum key (show digest) Nothing -> return False , failIncremental = writeIORef v Nothing + , descVerify = descChecksum } +descChecksum :: String +descChecksum = "checksum" + {- A varient of the SHA256E backend, for testing that needs special keys - that cannot collide with legitimate keys in the repository. - diff --git a/CHANGELOG b/CHANGELOG index 60e29e662f..0b06cf8235 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,9 +8,10 @@ git-annex (8.20210804) UNRELEASED; urgency=medium * add: When adding a dotfile, avoid treating its name as an extension. * rsync special remote: Stop displaying rsync progress, and use git-annex's own progress display. - * Several special remotes verify content while it is being retrieved, - avoiding a separate checksum pass. They are: S3, bup, ddar, - and gcrypt (with a local repository). + * Special remotes now checksum content while it is being retrieved, + instead of in a separate pass at the end. This is supported for all + special remotes on Linux (except for web and bittorrent), and for a + few on other OSs (S3, bup, ddar, gcrypt). -- Joey Hess Tue, 03 Aug 2021 12:22:45 -0400 diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index cf02ca1285..42a94f8d02 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -98,7 +98,7 @@ store repotop chunksize finalizer k b p = storeHelper repotop finalizer k $ \des - :/ This is legacy code.. -} retrieve :: (RawFilePath -> Key -> [RawFilePath]) -> RawFilePath -> Retriever -retrieve locations d basek p c = withOtherTmp $ \tmpdir -> do +retrieve locations d basek p miv c = withOtherTmp $ \tmpdir -> do showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow." let tmp = tmpdir P. keyFile basek <> ".directorylegacy.tmp" let tmp' = fromRawFilePath tmp @@ -110,7 +110,7 @@ retrieve locations d basek p c = withOtherTmp $ \tmpdir -> do b <- liftIO $ L.readFile tmp' liftIO $ removeWhenExistsWith R.removeLink tmp sink b - byteRetriever go basek p c + byteRetriever go basek p miv c checkKey :: RawFilePath -> (RawFilePath -> Key -> [RawFilePath]) -> Key -> Annex Bool checkKey d locations k = liftIO $ diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 75002cb190..f2b3efe98c 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -407,9 +407,9 @@ store' repo r rsyncopts accessmethod storersync = fileStorer $ Remote.Rsync.store rsyncopts retrieve :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever -retrieve r rsyncopts accessmethod k p sink = do +retrieve r rsyncopts accessmethod k p miv sink = do repo <- getRepo r - retrieve' repo r rsyncopts accessmethod k p sink + retrieve' repo r rsyncopts accessmethod k p miv sink retrieve' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever retrieve' repo r rsyncopts accessmethod diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 56271df5ac..a810557c83 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -267,8 +267,7 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc -- Optimisation: Try the unchunked key first, to avoid -- looking in the git-annex branch for chunk counts -- that are likely not there. - iv <- startVerifyKeyContentIncrementally vc basek - tryNonAsync (getunchunked iv) >>= \case + tryNonAsync getunchunked >>= \case Right r -> finalize r Left e -> go (Just e) =<< chunkKeysOnly u chunkconfig basek @@ -287,22 +286,20 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc firstavail (Just e) _ [] = throwM e firstavail pe currsize ([]:ls) = firstavail pe currsize ls firstavail _ currsize ((k:ks):ls) - | k == basek = do - iv <- startVerifyKeyContentIncrementally vc basek - getunchunked iv - `catchNonAsync` (\e -> firstavail (Just e) currsize ls) + | k == basek = getunchunked + `catchNonAsync` (\e -> firstavail (Just e) currsize ls) | otherwise = do let offset = resumeOffset currsize k let p = maybe basep (offsetMeterUpdate basep . toBytesProcessed) offset v <- tryNonAsync $ - retriever (encryptor k) p $ \content -> + retriever (encryptor k) p Nothing $ \content -> bracket (maybe opennew openresume offset) (liftIO . hClose . fst) $ \(h, iv) -> do - iv' <- retrieved iv (Just h) p content + retrieved iv (Just h) p content let sz = toBytesProcessed $ fromMaybe 0 $ fromKey keyChunkSize k - getrest p h iv' sz sz ks + getrest p h iv sz sz ks case v of Left e | null ls -> throwM e @@ -313,12 +310,21 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc getrest p h iv sz bytesprocessed (k:ks) = do let p' = offsetMeterUpdate p bytesprocessed liftIO $ p' zeroBytesProcessed - iv' <- retriever (encryptor k) p' $ + retriever (encryptor k) p' Nothing $ retrieved iv (Just h) p' - getrest p h iv' sz (addBytesProcessed bytesprocessed sz) ks + getrest p h iv sz (addBytesProcessed bytesprocessed sz) ks - getunchunked iv = retriever (encryptor basek) basep $ - retrieved iv Nothing basep + getunchunked = do + iv <- startVerifyKeyContentIncrementally vc basek + case enc of + Just _ -> retriever (encryptor basek) basep Nothing $ + retrieved iv Nothing basep + -- Not chunked and not encrypted, so ask the + -- retriever to incrementally verify when it + -- retrieves to a file. + Nothing -> retriever (encryptor basek) basep iv $ + retrieved iv Nothing basep + return iv opennew = do iv <- startVerifyKeyContentIncrementally vc basek @@ -365,13 +371,11 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc - will be provided, and instead the content will be written to the - dest file. - + - The IncrementalVerifier is updated as the file content is read. + - - Note that when neither chunking nor encryption is used, and the remote - provides FileContent, that file only needs to be renamed - into place. (And it may even already be in the right place..) - - - - The IncrementalVerifier is updated as the file content is read. - - If it was not able to be updated, due to the file not needing to be read, - - Nothing will be returned. -} writeRetrievedContent :: LensGpgEncParams encc @@ -382,32 +386,25 @@ writeRetrievedContent -> Maybe MeterUpdate -> ContentSource -> Maybe IncrementalVerifier - -> Annex (Maybe IncrementalVerifier) + -> Annex () writeRetrievedContent dest enc encc mh mp content miv = case (enc, mh, content) of (Nothing, Nothing, FileContent f) - | f == dest -> return Nothing - | otherwise -> do - liftIO $ moveFile f dest - return Nothing + | f == dest -> noop + | otherwise -> liftIO $ moveFile f dest (Just (cipher, _), _, ByteContent b) -> do cmd <- gpgCmd <$> Annex.getGitConfig decrypt cmd encc cipher (feedBytes b) $ readBytes write - return miv (Just (cipher, _), _, FileContent f) -> do cmd <- gpgCmd <$> Annex.getGitConfig withBytes content $ \b -> decrypt cmd encc cipher (feedBytes b) $ readBytes write liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f) - return miv (Nothing, _, FileContent f) -> do withBytes content write liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f) - return miv - (Nothing, _, ByteContent b) -> do - write b - return miv + (Nothing, _, ByteContent b) -> write b where write b = case mh of Just h -> liftIO $ write' b h diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index e5d991b25f..37ea42caf4 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -1,6 +1,6 @@ {- helpers for special remotes - - - Copyright 2011-2020 Joey Hess + - Copyright 2011-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -39,6 +39,7 @@ import Annex.Common import Annex.SpecialRemote.Config import Types.StoreRetrieve import Types.Remote +import Annex.Verify import Annex.UUID import Config import Config.Cost @@ -54,6 +55,8 @@ import Git.Types import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L import qualified Data.Map as M +import Control.Concurrent.STM +import Control.Concurrent.Async {- Special remotes don't have a configured url, so Git.Repo does not - automatically generate remotes for them. This looks for a different @@ -101,19 +104,33 @@ fileStorer a k (ByteContent b) m = withTmp k $ \f -> do byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex ()) -> Storer byteStorer a k c m = withBytes c $ \b -> a k b m --- A Retriever that writes the content of a Key to a provided file. --- It is responsible for updating the progress meter as it retrieves data. -fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever -fileRetriever a k m callback = do - f <- prepTmp k - a (fromRawFilePath f) k m - pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath) - -- A Retriever that generates a lazy ByteString containing the Key's -- content, and passes it to a callback action which will fully consume it -- before returning. -byteRetriever :: (Key -> (L.ByteString -> Annex a) -> Annex a) -> Key -> MeterUpdate -> (ContentSource -> Annex a) -> Annex a -byteRetriever a k _m callback = a k (callback . ByteContent) +byteRetriever :: (Key -> (L.ByteString -> Annex a) -> Annex a) -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a +byteRetriever a k _m _miv callback = a k (callback . ByteContent) + +-- A Retriever that writes the content of a Key to a provided file. +-- The action is responsible for updating the progress meter as it +-- retrieves data. The incremental verifier is updated in the background as +-- the action writes to the file. +fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever +fileRetriever a k m miv callback = do + f <- prepTmp k + let retrieve = a (fromRawFilePath f) k m + case miv of + Nothing -> retrieve + Just iv -> do + finished <- liftIO newEmptyTMVarIO + t <- liftIO $ async $ tailVerify iv f finished + retrieve + liftIO $ atomically $ putTMVar finished () + liftIO (wait t) >>= \case + Nothing -> noop + Just deferredverify -> do + showAction (descVerify iv) + liftIO deferredverify + pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath) {- The base Remote that is provided to specialRemote needs to have - storeKey, retrieveKeyFile, removeKey, and checkPresent methods, diff --git a/Types/Backend.hs b/Types/Backend.hs index 3d91fd5679..b8b9e71df9 100644 --- a/Types/Backend.hs +++ b/Types/Backend.hs @@ -52,4 +52,6 @@ data IncrementalVerifier = IncrementalVerifier -- if the hash verified. , failIncremental :: IO () -- ^ Call if the incremental verification needs to fail. + , descVerify :: String + -- ^ A description of what is done to verify the content. } diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index 14453a3d82..8c0f61d930 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -1,6 +1,6 @@ {- Types for Storer and Retriever actions for remotes. - - - Copyright 2014 Joey Hess + - Copyright 2014-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -11,6 +11,7 @@ module Types.StoreRetrieve where import Annex.Common import Utility.Metered +import Types.Backend (IncrementalVerifier) import qualified Data.ByteString.Lazy as L @@ -29,8 +30,17 @@ type Storer = Key -> ContentSource -> MeterUpdate -> Annex () -- Action that retrieves a Key's content from a remote, passing it to a -- callback, which will fully consume the content before returning. +-- -- Throws exception if key is not present, or remote is not accessible. -type Retriever = forall a. Key -> MeterUpdate -> (ContentSource -> Annex a) -> Annex a +-- +-- When it retrieves FileContent, it is responsible for updating the +-- MeterUpdate. And when the IncrementalVerifier is passed to it, +-- and it retrieves FileContent, it should feed the content to the +-- verifier before running the callback. +-- This should not be done when it retrieves ByteContent. +type Retriever = forall a. + Key -> MeterUpdate -> Maybe IncrementalVerifier + -> (ContentSource -> Annex a) -> Annex a -- Action that removes a Key's content from a remote. -- Succeeds if key is already not present.