handle multiple VURL checksums in one pass

git-annex fsck and some other commands that verify the content of a key
were using the non-incremental verification interface. But for VURL
urls, that interface is innefficient because when there are multiple
equivilant keys, it has to separately read and checksum for each key in
turn until one matches. It's more efficient for those to use the
incremental interface, since the file can be read a single time.

There's no real downside to using the incremental interface when available.

Note that more speedup could be had for VURL, if it was able to
calculate the checksum a single time and then compare with the
equivilant keys checksums. When the equivilant keys use the same type of
checksum.

Sponsored-by: k0ld on Patreon
This commit is contained in:
Joey Hess 2024-03-01 14:41:10 -04:00
parent 310a49a76d
commit 9c988ee607
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 40 additions and 24 deletions

View file

@ -12,6 +12,7 @@ module Annex.Verify (
shouldVerify, shouldVerify,
verifyKeyContentPostRetrieval, verifyKeyContentPostRetrieval,
verifyKeyContent, verifyKeyContent,
verifyKeyContent',
Verification(..), Verification(..),
unVerified, unVerified,
warnUnverifiableInsecure, warnUnverifiableInsecure,
@ -97,16 +98,32 @@ verifyKeyContentPostRetrieval rsp v verification k f = case (rsp, verification)
resumeVerifyKeyContent k f iv resumeVerifyKeyContent k f iv
_ -> verifyKeyContent k f _ -> verifyKeyContent k f
-- When possible, does an incremental verification, because that can be
-- faster. Eg, the VURL backend can need to try multiple checksums and only
-- with an incremental verification does it avoid reading files twice.
verifyKeyContent :: Key -> RawFilePath -> Annex Bool verifyKeyContent :: Key -> RawFilePath -> Annex Bool
verifyKeyContent k f = verifyKeySize k f <&&> verifyKeyContent' k f verifyKeyContent k f = verifyKeySize k f <&&> verifyKeyContent' k f
-- Does not verify size.
verifyKeyContent' :: Key -> RawFilePath -> Annex Bool verifyKeyContent' :: Key -> RawFilePath -> Annex Bool
verifyKeyContent' k f = verifyKeyContent' k f =
Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case
Nothing -> return True Nothing -> return True
Just b -> case Types.Backend.verifyKeyContent b of Just b -> case (Types.Backend.verifyKeyContentIncrementally b, Types.Backend.verifyKeyContent b) of
Nothing -> return True (Nothing, Nothing) -> return True
Just verifier -> verifier k f (Just mkiv, mverifier) -> do
iv <- mkiv k
showAction (UnquotedString (descIncrementalVerifier iv))
res <- liftIO $ catchDefaultIO Nothing $
withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
feedIncrementalVerifier h iv
finalizeIncrementalVerifier iv
case res of
Just res' -> return res'
Nothing -> case mverifier of
Nothing -> return True
Just verifier -> verifier k f
(Nothing, Just verifier) -> verifier k f
resumeVerifyKeyContent :: Key -> RawFilePath -> IncrementalVerifier -> Annex Bool resumeVerifyKeyContent :: Key -> RawFilePath -> IncrementalVerifier -> Annex Bool
resumeVerifyKeyContent k f iv = liftIO (positionIncrementalVerifier iv) >>= \case resumeVerifyKeyContent k f iv = liftIO (positionIncrementalVerifier iv) >>= \case
@ -132,17 +149,18 @@ resumeVerifyKeyContent k f iv = liftIO (positionIncrementalVerifier iv) >>= \cas
liftIO $ catchDefaultIO (Just False) $ liftIO $ catchDefaultIO (Just False) $
withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
hSeek h AbsoluteSeek endpos hSeek h AbsoluteSeek endpos
feedincremental h feedIncrementalVerifier h iv
finalizeIncrementalVerifier iv finalizeIncrementalVerifier iv
feedincremental h = do feedIncrementalVerifier :: Handle -> IncrementalVerifier -> IO ()
b <- S.hGetSome h chunk feedIncrementalVerifier h iv = do
if S.null b b <- S.hGetSome h chunk
then return () if S.null b
else do then return ()
updateIncrementalVerifier iv b else do
feedincremental h updateIncrementalVerifier iv b
feedIncrementalVerifier h iv
where
chunk = 65536 chunk = 65536
verifyKeySize :: Key -> RawFilePath -> Annex Bool verifyKeySize :: Key -> RawFilePath -> Annex Bool

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2010-2022 Joey Hess <id@joeyh.name> - Copyright 2010-2023 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -16,6 +16,7 @@ import qualified Remote
import qualified Types.Backend import qualified Types.Backend
import qualified Backend import qualified Backend
import Annex.Content import Annex.Content
import Annex.Verify
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
import Annex.Version import Annex.Version
import Annex.Content.Presence import Annex.Content.Presence
@ -524,17 +525,14 @@ checkBackendRemote backend key remote ai localcopy =
checkBackendOr (badContentRemote remote localcopy) backend key localcopy ai checkBackendOr (badContentRemote remote localcopy) backend key localcopy ai
checkBackendOr :: (Key -> Annex String) -> Backend -> Key -> RawFilePath -> ActionItem -> Annex Bool checkBackendOr :: (Key -> Annex String) -> Backend -> Key -> RawFilePath -> ActionItem -> Annex Bool
checkBackendOr bad backend key file ai = checkBackendOr bad backend key file ai = do
case Types.Backend.verifyKeyContent backend of ok <- verifyKeyContent' key file
Just verifier -> do unless ok $ do
ok <- verifier key file msg <- bad key
unless ok $ do warning $ actionItemDesc ai
msg <- bad key <> ": Bad file content; "
warning $ actionItemDesc ai <> UnquotedString msg
<> ": Bad file content; " return ok
<> UnquotedString msg
return ok
Nothing -> return True
{- Check, if there are InodeCaches recorded for a key, that one of them {- Check, if there are InodeCaches recorded for a key, that one of them
- matches the object file. There are situations where the InodeCache - matches the object file. There are situations where the InodeCache