From ed684f651e0e6728b87f4aa0b6b62ad00f012b40 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 9 Feb 2021 15:00:51 -0400 Subject: [PATCH 1/4] add incremental hashing interface to Backend As yet unused. Backend.External could perhaps implement it too, although that would involve sending chunks of data to it via a pipe or something, so likely to be slow. --- Backend/External.hs | 2 + Backend/Hash.hs | 145 ++++++++++++++++++++++++++------------------ Backend/URL.hs | 1 + Backend/WORM.hs | 1 + Types/Backend.hs | 21 +++++-- Utility/Hash.hs | 99 +++++++++++++++++++++++++++++- 6 files changed, 204 insertions(+), 65 deletions(-) diff --git a/Backend/External.hs b/Backend/External.hs index 335dee24f2..7ad8303f8c 100644 --- a/Backend/External.hs +++ b/Backend/External.hs @@ -67,6 +67,7 @@ makeBackend' ebname@(ExternalBackendName bname) hasext (Right p) = do -- bump if progress handling is later added. nullMeterUpdate else Nothing + , verifyKeyContentIncrementally = Nothing , canUpgradeKey = Nothing , fastMigrate = Nothing , isStableKey = const isstable @@ -80,6 +81,7 @@ unavailBackend (ExternalBackendName bname) hasext = { backendVariety = ExternalKey bname hasext , genKey = Nothing , verifyKeyContent = Nothing + , verifyKeyContentIncrementally = Nothing , canUpgradeKey = Nothing , fastMigrate = Nothing , isStableKey = const False diff --git a/Backend/Hash.hs b/Backend/Hash.hs index b01ca47e4d..12ad921aaf 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -1,6 +1,6 @@ {- git-annex hashing backends - - - Copyright 2011-2020 Joey Hess + - Copyright 2011-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -28,6 +28,7 @@ import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Lazy as L import Control.DeepSeq import Control.Exception (evaluate) +import Control.Concurrent.MVar data Hash = MD5Hash @@ -75,6 +76,7 @@ genBackend hash = Backend { backendVariety = hashKeyVariety hash (HasExt False) , genKey = Just (keyValue hash) , verifyKeyContent = Just $ checkKeyChecksum hash + , verifyKeyContentIncrementally = Just $ checkKeyChecksumIncremental hash , canUpgradeKey = Just needsUpgrade , fastMigrate = Just trivialMigrate , isStableKey = const True @@ -116,8 +118,6 @@ keyValueE hash source meterupdate = keyValue hash source meterupdate >>= addE source (const $ hashKeyVariety hash (HasExt True)) -{- A key's checksum is checked during fsck when it's content is present - - except for in fast mode. -} checkKeyChecksum :: Hash -> Key -> RawFilePath -> Annex Bool checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do fast <- Annex.getState Annex.fast @@ -125,22 +125,29 @@ checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do case (exists, fast) of (True, False) -> do showAction "checksum" - check <$> hashFile hash file nullMeterUpdate + sameCheckSum key + <$> hashFile hash file nullMeterUpdate _ -> return True where - expected = decodeBS (keyHash key) - check s - | s == expected = True - {- A bug caused checksums to be prefixed with \ in some - - cases; still accept these as legal now that the bug has been - - fixed. -} - | '\\' : s == expected = True - | otherwise = False - hwfault e = do warning $ "hardware fault: " ++ show e return False +sameCheckSum :: Key -> String -> Bool +sameCheckSum key s + | s == expected = True + {- A bug caused checksums to be prefixed with \ in some + - cases; still accept these as legal now that the bug + - has been fixed. -} + | '\\' : s == expected = True + | otherwise = False + where + expected = decodeBS (keyHash key) + +checkKeyChecksumIncremental :: Hash -> Key -> Annex IncrementalVerifier +checkKeyChecksumIncremental hash key = liftIO $ + (\h -> snd h key) (hasher hash) + keyHash :: Key -> S.ByteString keyHash = fst . splitKeyNameExtension @@ -195,79 +202,97 @@ trivialMigrate' oldkey newbackend afile maxextlen hashFile :: Hash -> RawFilePath -> MeterUpdate -> Annex String hashFile hash file meterupdate = liftIO $ withMeteredFile (fromRawFilePath file) meterupdate $ \b -> do - let h = hasher b + let h = (fst $ hasher hash) b -- Force full evaluation of hash so whole file is read -- before returning. evaluate (rnf h) return h - where - hasher = case hash of - MD5Hash -> md5Hasher - SHA1Hash -> sha1Hasher - SHA2Hash hashsize -> sha2Hasher hashsize - SHA3Hash hashsize -> sha3Hasher hashsize - SkeinHash hashsize -> skeinHasher hashsize - Blake2bHash hashsize -> blake2bHasher hashsize - Blake2bpHash hashsize -> blake2bpHasher hashsize - Blake2sHash hashsize -> blake2sHasher hashsize - Blake2spHash hashsize -> blake2spHasher hashsize -sha2Hasher :: HashSize -> (L.ByteString -> String) +type Hasher = (L.ByteString -> String, Key -> IO IncrementalVerifier) + +hasher :: Hash -> Hasher +hasher MD5Hash = md5Hasher +hasher SHA1Hash = sha1Hasher +hasher (SHA2Hash hashsize) = sha2Hasher hashsize +hasher (SHA3Hash hashsize) = sha3Hasher hashsize +hasher (SkeinHash hashsize) = skeinHasher hashsize +hasher (Blake2bHash hashsize) = blake2bHasher hashsize +hasher (Blake2bpHash hashsize) = blake2bpHasher hashsize +hasher (Blake2sHash hashsize) = blake2sHasher hashsize +hasher (Blake2spHash hashsize) = blake2spHasher hashsize + +mkHasher :: HashAlgorithm h => (L.ByteString -> Digest h) -> Context h -> Hasher +mkHasher h c = (show . h, mkIncrementalVerifier c) + +sha2Hasher :: HashSize -> Hasher sha2Hasher (HashSize hashsize) - | hashsize == 256 = use sha2_256 - | hashsize == 224 = use sha2_224 - | hashsize == 384 = use sha2_384 - | hashsize == 512 = use sha2_512 - | otherwise = error $ "unsupported SHA size " ++ show hashsize - where - use hasher = show . hasher + | hashsize == 256 = mkHasher sha2_256 sha2_256_context + | hashsize == 224 = mkHasher sha2_224 sha2_224_context + | hashsize == 384 = mkHasher sha2_384 sha2_384_context + | hashsize == 512 = mkHasher sha2_512 sha2_512_context + | otherwise = error $ "unsupported SHA2 size " ++ show hashsize -sha3Hasher :: HashSize -> (L.ByteString -> String) +sha3Hasher :: HashSize -> Hasher sha3Hasher (HashSize hashsize) - | hashsize == 256 = show . sha3_256 - | hashsize == 224 = show . sha3_224 - | hashsize == 384 = show . sha3_384 - | hashsize == 512 = show . sha3_512 + | hashsize == 256 = mkHasher sha3_256 sha3_256_context + | hashsize == 224 = mkHasher sha3_224 sha3_224_context + | hashsize == 384 = mkHasher sha3_384 sha3_384_context + | hashsize == 512 = mkHasher sha3_512 sha3_512_context | otherwise = error $ "unsupported SHA3 size " ++ show hashsize -skeinHasher :: HashSize -> (L.ByteString -> String) +skeinHasher :: HashSize -> Hasher skeinHasher (HashSize hashsize) - | hashsize == 256 = show . skein256 - | hashsize == 512 = show . skein512 + | hashsize == 256 = mkHasher skein256 skein256_context + | hashsize == 512 = mkHasher skein512 skein512_context | otherwise = error $ "unsupported SKEIN size " ++ show hashsize -blake2bHasher :: HashSize -> (L.ByteString -> String) +blake2bHasher :: HashSize -> Hasher blake2bHasher (HashSize hashsize) - | hashsize == 256 = show . blake2b_256 - | hashsize == 512 = show . blake2b_512 - | hashsize == 160 = show . blake2b_160 - | hashsize == 224 = show . blake2b_224 - | hashsize == 384 = show . blake2b_384 + | hashsize == 256 = mkHasher blake2b_256 blake2b_256_context + | hashsize == 512 = mkHasher blake2b_512 blake2b_512_context + | hashsize == 160 = mkHasher blake2b_160 blake2b_160_context + | hashsize == 224 = mkHasher blake2b_224 blake2b_224_context + | hashsize == 384 = mkHasher blake2b_384 blake2b_384_context | otherwise = error $ "unsupported BLAKE2B size " ++ show hashsize -blake2bpHasher :: HashSize -> (L.ByteString -> String) +blake2bpHasher :: HashSize -> Hasher blake2bpHasher (HashSize hashsize) - | hashsize == 512 = show . blake2bp_512 + | hashsize == 512 = mkHasher blake2bp_512 blake2bp_512_context | otherwise = error $ "unsupported BLAKE2BP size " ++ show hashsize -blake2sHasher :: HashSize -> (L.ByteString -> String) +blake2sHasher :: HashSize -> Hasher blake2sHasher (HashSize hashsize) - | hashsize == 256 = show . blake2s_256 - | hashsize == 160 = show . blake2s_160 - | hashsize == 224 = show . blake2s_224 + | hashsize == 256 = mkHasher blake2s_256 blake2s_256_context + | hashsize == 160 = mkHasher blake2s_160 blake2s_160_context + | hashsize == 224 = mkHasher blake2s_224 blake2s_224_context | otherwise = error $ "unsupported BLAKE2S size " ++ show hashsize -blake2spHasher :: HashSize -> (L.ByteString -> String) +blake2spHasher :: HashSize -> Hasher blake2spHasher (HashSize hashsize) - | hashsize == 256 = show . blake2sp_256 - | hashsize == 224 = show . blake2sp_224 + | hashsize == 256 = mkHasher blake2sp_256 blake2sp_256_context + | hashsize == 224 = mkHasher blake2sp_224 blake2sp_224_context | otherwise = error $ "unsupported BLAKE2SP size " ++ show hashsize -sha1Hasher :: L.ByteString -> String -sha1Hasher = show . sha1 +sha1Hasher :: Hasher +sha1Hasher = mkHasher sha1 sha1_context -md5Hasher :: L.ByteString -> String -md5Hasher = show . md5 +md5Hasher :: Hasher +md5Hasher = mkHasher md5 md5_context + +mkIncrementalVerifier :: HashAlgorithm h => Context h -> Key -> IO IncrementalVerifier +mkIncrementalVerifier ctx key = do + v <- newMVar ctx + return $ IncrementalVerifier + { updateIncremental = \b -> do + ctx' <- takeMVar v + let ctx'' = hashUpdate ctx' b + evaluate $ rnf ctx'' + putMVar v ctx'' + , finalizeIncremental = do + ctx' <- takeMVar v + let digest = hashFinalize ctx' + return $ sameCheckSum key (show digest) + } {- A varient of the SHA256E backend, for testing that needs special keys - that cannot collide with legitimate keys in the repository. diff --git a/Backend/URL.hs b/Backend/URL.hs index 5c8235760a..0468cbbe36 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -23,6 +23,7 @@ backend = Backend { backendVariety = URLKey , genKey = Nothing , verifyKeyContent = Nothing + , verifyKeyContentIncrementally = Nothing , canUpgradeKey = Nothing , fastMigrate = Nothing -- The content of an url can change at any time, so URL keys are diff --git a/Backend/WORM.hs b/Backend/WORM.hs index 77eb1c9c72..af116a8077 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -26,6 +26,7 @@ backend = Backend { backendVariety = WORMKey , genKey = Just keyValue , verifyKeyContent = Nothing + , verifyKeyContentIncrementally = Nothing , canUpgradeKey = Just needsUpgrade , fastMigrate = Just removeProblemChars , isStableKey = const True diff --git a/Types/Backend.hs b/Types/Backend.hs index a2bf9d130c..5b5b0b6f99 100644 --- a/Types/Backend.hs +++ b/Types/Backend.hs @@ -2,7 +2,7 @@ - - Most things should not need this, using Types instead - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -11,16 +11,21 @@ module Types.Backend where import Types.Key import Types.KeySource - import Utility.Metered import Utility.FileSystemEncoding +import Data.ByteString (ByteString) + data BackendA a = Backend { backendVariety :: KeyVariety , genKey :: Maybe (KeySource -> MeterUpdate -> a Key) - -- Verifies the content of a key using a hash. This does not need - -- to be cryptographically secure. + -- Verifies the content of a key, stored in a file, using a hash. + -- This does not need to be cryptographically secure. , verifyKeyContent :: Maybe (Key -> RawFilePath -> a Bool) + -- Incrementally verifies the content of a key, using the same + -- hash as verifyKeyContent, but with the content provided + -- incrementally a peice at a time, until finalized. + , verifyKeyContentIncrementally :: Maybe (Key -> a IncrementalVerifier) -- Checks if a key can be upgraded to a better form. , canUpgradeKey :: Maybe (Key -> Bool) -- Checks if there is a fast way to migrate a key to a different @@ -38,3 +43,11 @@ instance Show (BackendA a) where instance Eq (BackendA a) where a == b = backendVariety a == backendVariety b + +data IncrementalVerifier = IncrementalVerifier + { updateIncremental :: ByteString -> IO () + -- ^ Called repeatedly on each peice of the content. + , finalizeIncremental :: IO Bool + -- ^ Called once the full content has been sent, returns true + -- if the hash verified. + } diff --git a/Utility/Hash.hs b/Utility/Hash.hs index 732009a8d1..e843e740ea 100644 --- a/Utility/Hash.hs +++ b/Utility/Hash.hs @@ -2,29 +2,57 @@ module Utility.Hash ( sha1, + sha1_context, sha2_224, + sha2_224_context, sha2_256, + sha2_256_context, sha2_384, + sha2_384_context, sha2_512, + sha2_512_context, sha3_224, + sha3_224_context, sha3_256, + sha3_256_context, sha3_384, + sha3_384_context, sha3_512, + sha3_512_context, skein256, + skein256_context, skein512, + skein512_context, blake2s_160, + blake2s_160_context, blake2s_224, + blake2s_224_context, blake2s_256, + blake2s_256_context, blake2sp_224, + blake2sp_224_context, blake2sp_256, + blake2sp_256_context, blake2b_160, + blake2b_160_context, blake2b_224, + blake2b_224_context, blake2b_256, + blake2b_256_context, blake2b_384, + blake2b_384_context, blake2b_512, + blake2b_512_context, blake2bp_512, + blake2bp_512_context, md5, + md5_context, md5s, + hashUpdate, + hashFinalize, + Digest, + HashAlgorithm, + Context, props_hashes_stable, Mac(..), calcMac, @@ -35,78 +63,147 @@ import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L import qualified Data.Text as T import qualified Data.Text.Encoding as T -import "cryptonite" Crypto.MAC.HMAC +import "cryptonite" Crypto.MAC.HMAC hiding (Context) import "cryptonite" Crypto.Hash sha1 :: L.ByteString -> Digest SHA1 sha1 = hashlazy +sha1_context :: Context SHA1 +sha1_context = hashInit + sha2_224 :: L.ByteString -> Digest SHA224 sha2_224 = hashlazy +sha2_224_context :: Context SHA224 +sha2_224_context = hashInit + sha2_256 :: L.ByteString -> Digest SHA256 sha2_256 = hashlazy +sha2_256_context :: Context SHA256 +sha2_256_context = hashInit + sha2_384 :: L.ByteString -> Digest SHA384 sha2_384 = hashlazy +sha2_384_context :: Context SHA384 +sha2_384_context = hashInit + sha2_512 :: L.ByteString -> Digest SHA512 sha2_512 = hashlazy +sha2_512_context :: Context SHA512 +sha2_512_context = hashInit + sha3_224 :: L.ByteString -> Digest SHA3_224 sha3_224 = hashlazy +sha3_224_context :: Context SHA3_224 +sha3_224_context = hashInit + sha3_256 :: L.ByteString -> Digest SHA3_256 sha3_256 = hashlazy +sha3_256_context :: Context SHA3_256 +sha3_256_context = hashInit + sha3_384 :: L.ByteString -> Digest SHA3_384 sha3_384 = hashlazy +sha3_384_context :: Context SHA3_384 +sha3_384_context = hashInit + sha3_512 :: L.ByteString -> Digest SHA3_512 sha3_512 = hashlazy +sha3_512_context :: Context SHA3_512 +sha3_512_context = hashInit + skein256 :: L.ByteString -> Digest Skein256_256 skein256 = hashlazy +skein256_context :: Context Skein256_256 +skein256_context = hashInit + skein512 :: L.ByteString -> Digest Skein512_512 skein512 = hashlazy +skein512_context :: Context Skein512_512 +skein512_context = hashInit + blake2s_160 :: L.ByteString -> Digest Blake2s_160 blake2s_160 = hashlazy +blake2s_160_context :: Context Blake2s_160 +blake2s_160_context = hashInit + blake2s_224 :: L.ByteString -> Digest Blake2s_224 blake2s_224 = hashlazy +blake2s_224_context :: Context Blake2s_224 +blake2s_224_context = hashInit + blake2s_256 :: L.ByteString -> Digest Blake2s_256 blake2s_256 = hashlazy +blake2s_256_context :: Context Blake2s_256 +blake2s_256_context = hashInit + blake2sp_224 :: L.ByteString -> Digest Blake2sp_224 blake2sp_224 = hashlazy +blake2sp_224_context :: Context Blake2sp_224 +blake2sp_224_context = hashInit + blake2sp_256 :: L.ByteString -> Digest Blake2sp_256 blake2sp_256 = hashlazy +blake2sp_256_context :: Context Blake2sp_256 +blake2sp_256_context = hashInit + blake2b_160 :: L.ByteString -> Digest Blake2b_160 blake2b_160 = hashlazy +blake2b_160_context :: Context Blake2b_160 +blake2b_160_context = hashInit + blake2b_224 :: L.ByteString -> Digest Blake2b_224 blake2b_224 = hashlazy +blake2b_224_context :: Context Blake2b_224 +blake2b_224_context = hashInit + blake2b_256 :: L.ByteString -> Digest Blake2b_256 blake2b_256 = hashlazy +blake2b_256_context :: Context Blake2b_256 +blake2b_256_context = hashInit + blake2b_384 :: L.ByteString -> Digest Blake2b_384 blake2b_384 = hashlazy +blake2b_384_context :: Context Blake2b_384 +blake2b_384_context = hashInit + blake2b_512 :: L.ByteString -> Digest Blake2b_512 blake2b_512 = hashlazy +blake2b_512_context :: Context Blake2b_512 +blake2b_512_context = hashInit + blake2bp_512 :: L.ByteString -> Digest Blake2bp_512 blake2bp_512 = hashlazy +blake2bp_512_context :: Context Blake2bp_512 +blake2bp_512_context = hashInit + md5 :: L.ByteString -> Digest MD5 md5 = hashlazy +md5_context :: Context MD5 +md5_context = hashInit + md5s :: S.ByteString -> Digest MD5 md5s = hash From 62e152f2107a10a7a5c84eea01521a79b6c6da47 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 9 Feb 2021 17:03:27 -0400 Subject: [PATCH 2/4] incremental checksum on download from ssh or p2p Checksum as content is received from a remote git-annex repository, rather than doing it in a second pass. Not tested at all yet, but I imagine it will work! Not implemented for any special remotes, and also not implemented for copies from local remotes. It may be that, for local remotes, it will suffice to use rsync, rely on its checksumming, and simply return Verified. (It would still make a checksumming pass when cp is used for COW, I guess.) --- Annex/Content/Presence.hs | 15 +--------- Annex/Verify.hs | 25 ++++++++++++++++ Backend.hs | 15 +++++++++- Backend/Hash.hs | 3 +- CHANGELOG | 2 ++ P2P/Annex.hs | 53 ++++++++++++++++++++++++++------- P2P/IO.hs | 2 +- P2P/Protocol.hs | 11 +++---- Remote/Bup.hs | 2 +- Remote/Git.hs | 1 + Remote/Glacier.hs | 3 +- Remote/Helper/Chunked/Legacy.hs | 3 +- Remote/Helper/P2P.hs | 10 ++++--- Remote/Helper/Special.hs | 2 +- Remote/P2P.hs | 3 +- Utility/Metered.hs | 17 ++++++----- git-annex.cabal | 3 +- 17 files changed, 118 insertions(+), 52 deletions(-) create mode 100644 Annex/Verify.hs diff --git a/Annex/Content/Presence.hs b/Annex/Content/Presence.hs index 05ff5715e1..6d9d158649 100644 --- a/Annex/Content/Presence.hs +++ b/Annex/Content/Presence.hs @@ -26,10 +26,10 @@ module Annex.Content.Presence ( import Annex.Common import qualified Annex +import Annex.Verify import Annex.LockPool import Annex.WorkerPool import Types.Remote (unVerified, Verification(..), RetrievalSecurityPolicy(..)) -import qualified Types.Remote import qualified Types.Backend import qualified Backend import qualified Database.Keys @@ -231,16 +231,3 @@ warnUnverifiableInsecure k = warning $ unwords ] where kv = decodeBS (formatKeyVariety (fromKey keyVariety k)) - -data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify - -shouldVerify :: VerifyConfig -> Annex Bool -shouldVerify AlwaysVerify = return True -shouldVerify NoVerify = return False -shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig -shouldVerify (RemoteVerify r) = - (shouldVerify DefaultVerify - <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))) - -- Export remotes are not key/value stores, so always verify - -- content from them even when verification is disabled. - <||> Types.Remote.isExportSupported r diff --git a/Annex/Verify.hs b/Annex/Verify.hs new file mode 100644 index 0000000000..6d1a6ab37f --- /dev/null +++ b/Annex/Verify.hs @@ -0,0 +1,25 @@ +{- verification + - + - Copyright 2010-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Verify where + +import Annex.Common +import qualified Annex +import qualified Types.Remote + +data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify + +shouldVerify :: VerifyConfig -> Annex Bool +shouldVerify AlwaysVerify = return True +shouldVerify NoVerify = return False +shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig +shouldVerify (RemoteVerify r) = + (shouldVerify DefaultVerify + <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))) + -- Export remotes are not key/value stores, so always verify + -- content from them even when verification is disabled. + <||> Types.Remote.isExportSupported r diff --git a/Backend.hs b/Backend.hs index 5769c840cc..76ba12313a 100644 --- a/Backend.hs +++ b/Backend.hs @@ -1,6 +1,6 @@ {- git-annex key/value backends - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -17,11 +17,13 @@ module Backend ( isStableKey, isCryptographicallySecure, isVerifiable, + startVerifyKeyContentIncrementally, ) where import Annex.Common import qualified Annex import Annex.CheckAttr +import Annex.Verify import Types.Key import Types.KeySource import qualified Types.Backend as B @@ -127,3 +129,14 @@ isCryptographicallySecure k = maybe False (`B.isCryptographicallySecure` k) isVerifiable :: Key -> Annex Bool isVerifiable k = maybe False (isJust . B.verifyKeyContent) <$> maybeLookupBackendVariety (fromKey keyVariety k) + +startVerifyKeyContentIncrementally :: VerifyConfig -> Key -> Annex (Maybe B.IncrementalVerifier) +startVerifyKeyContentIncrementally verifyconfig k = + ifM (shouldVerify verifyconfig) + ( maybeLookupBackendVariety (fromKey keyVariety k) >>= \case + Just b -> case B.verifyKeyContentIncrementally b of + Just v -> Just <$> v k + Nothing -> return Nothing + Nothing -> return Nothing + , return Nothing + ) diff --git a/Backend/Hash.hs b/Backend/Hash.hs index 12ad921aaf..0e723dea5d 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -145,8 +145,7 @@ sameCheckSum key s expected = decodeBS (keyHash key) checkKeyChecksumIncremental :: Hash -> Key -> Annex IncrementalVerifier -checkKeyChecksumIncremental hash key = liftIO $ - (\h -> snd h key) (hasher hash) +checkKeyChecksumIncremental hash key = liftIO $ (snd $ hasher hash) key keyHash :: Key -> S.ByteString keyHash = fst . splitKeyNameExtension diff --git a/CHANGELOG b/CHANGELOG index c8ed840b86..b889050c88 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -24,6 +24,8 @@ git-annex (8.20210128) UNRELEASED; urgency=medium * Include libkqueue.h file needed to build the assistant on BSDs. * Tahoe: Avoid verifying hash after download, since tahoe does sufficient verification itself. + * Checksum as content is received from a remote git-annex repository, + rather than doing it in a second pass. -- Joey Hess Thu, 28 Jan 2021 12:34:32 -0400 diff --git a/P2P/Annex.hs b/P2P/Annex.hs index b933575c41..c28c3eba71 100644 --- a/P2P/Annex.hs +++ b/P2P/Annex.hs @@ -1,6 +1,6 @@ {- P2P protocol, Annex implementation - - - Copyright 2016-2018 Joey Hess + - Copyright 2016-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -23,9 +23,11 @@ import P2P.IO import Logs.Location import Types.NumCopies import Utility.Metered +import Types.Backend (IncrementalVerifier(..)) import Control.Monad.Free import Control.Concurrent.STM +import qualified Data.ByteString as S -- Full interpreter for Proto, that can receive and send objects. runFullProto :: RunState -> P2PConnection -> Proto a -> Annex (Either ProtoFailure a) @@ -77,7 +79,7 @@ runLocal runst runner a = case a of let runtransfer ti = Right <$> transfer download' k af Nothing (\p -> logStatusAfter k $ getViaTmp rsp DefaultVerify k af $ \tmp -> - storefile (fromRawFilePath tmp) o l getb validitycheck p ti) + storefile (fromRawFilePath tmp) o l getb Nothing validitycheck p ti) let fallback = return $ Left $ ProtoFailureMessage "transfer already in progress, or unable to take transfer lock" checktransfer runtransfer fallback @@ -85,10 +87,10 @@ runLocal runst runner a = case a of Left e -> return $ Left $ ProtoFailureException e Right (Left e) -> return $ Left e Right (Right ok) -> runner (next ok) - StoreContentTo dest o l getb validitycheck next -> do + StoreContentTo dest incrementalverifier o l getb validitycheck next -> do v <- tryNonAsync $ do let runtransfer ti = Right - <$> storefile dest o l getb validitycheck nullMeterUpdate ti + <$> storefile dest o l getb incrementalverifier validitycheck nullMeterUpdate ti let fallback = return $ Left $ ProtoFailureMessage "transfer failed" checktransfer runtransfer fallback @@ -153,16 +155,41 @@ runLocal runst runner a = case a of -- Transfer logs are updated higher in the stack when -- a client. Client _ -> ta nullMeterUpdate + + resumefromoffset o incrementalverifier p h + | o /= 0 = do + p' <- case incrementalverifier of + Just iv -> do + go iv o + return p + _ -> return $ offsetMeterUpdate p (toBytesProcessed o) + -- Make sure the handle is seeked to the offset. + -- (Reading the file probably left it there + -- when that was done, but let's be sure.) + hSeek h AbsoluteSeek o + return p' + | otherwise = return p + where + go iv n + | n == 0 = return () + | n > fromIntegral defaultChunkSize = do + updateIncremental iv =<< S.hGet h defaultChunkSize + go iv (n - fromIntegral defaultChunkSize) + | otherwise = + updateIncremental iv =<< S.hGet h (fromIntegral n) - storefile dest (Offset o) (Len l) getb validitycheck p ti = do - let p' = offsetMeterUpdate p (toBytesProcessed o) + storefile dest (Offset o) (Len l) getb incrementalverifier validitycheck p ti = do v <- runner getb case v of Right b -> do liftIO $ withBinaryFile dest ReadWriteMode $ \h -> do - when (o /= 0) $ - hSeek h AbsoluteSeek o - meteredWrite p' h b + p' <- resumefromoffset o incrementalverifier p h + let writechunk = case incrementalverifier of + Nothing -> \c -> S.hPut h c + Just iv -> \c -> do + S.hPut h c + updateIncremental iv c + meteredWrite p' writechunk b indicatetransferred ti rightsize <- do @@ -170,8 +197,12 @@ runLocal runst runner a = case a of return (toInteger sz == l + o) runner validitycheck >>= \case - Right (Just Valid) -> - return (rightsize, UnVerified) + Right (Just Valid) -> case incrementalverifier of + Just iv -> ifM (liftIO (finalizeIncremental iv) <&&> pure rightsize) + ( return (True, Verified) + , return (False, UnVerified) + ) + Nothing -> return (rightsize, UnVerified) Right (Just Invalid) | l == 0 -> -- Special case, for when -- content was not diff --git a/P2P/IO.hs b/P2P/IO.hs index 9a71ba89f0..d089f1eb00 100644 --- a/P2P/IO.hs +++ b/P2P/IO.hs @@ -259,7 +259,7 @@ debugMessage conn prefix m = do -- connection. False is returned to indicate this problem. sendExactly :: Len -> L.ByteString -> Handle -> MeterUpdate -> IO Bool sendExactly (Len n) b h p = do - sent <- meteredWrite' p h (L.take (fromIntegral n) b) + sent <- meteredWrite' p (B.hPut h) (L.take (fromIntegral n) b) return (fromBytesProcessed sent == n) receiveExactly :: Len -> Handle -> MeterUpdate -> IO L.ByteString diff --git a/P2P/Protocol.hs b/P2P/Protocol.hs index 13d5ea8c3f..568bccc5cf 100644 --- a/P2P/Protocol.hs +++ b/P2P/Protocol.hs @@ -2,7 +2,7 @@ - - See doc/design/p2p_protocol.mdwn - - - Copyright 2016-2020 Joey Hess + - Copyright 2016-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -18,6 +18,7 @@ import Types (Annex) import Types.Key import Types.UUID import Types.Remote (Verification(..), unVerified) +import Types.Backend (IncrementalVerifier(..)) import Utility.AuthToken import Utility.Applicative import Utility.PartialPrelude @@ -266,7 +267,7 @@ data LocalF c -- Note: The ByteString may not contain the entire remaining content -- of the key. Only once the temp file size == Len has the whole -- content been transferred. - | StoreContentTo FilePath Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c) + | StoreContentTo FilePath (Maybe IncrementalVerifier) Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c) -- ^ Like StoreContent, but stores the content to a temp file. | SetPresent Key UUID c | CheckContentPresent Key (Bool -> c) @@ -351,13 +352,13 @@ remove key = do net $ sendMessage (REMOVE key) checkSuccess -get :: FilePath -> Key -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification) -get dest key af m p = +get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification) +get dest key iv af m p = receiveContent (Just m) p sizer storer $ \offset -> GET offset (ProtoAssociatedFile af) key where sizer = fileSize dest - storer = storeContentTo dest + storer = storeContentTo dest iv put :: Key -> AssociatedFile -> MeterUpdate -> Proto Bool put key af p = do diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 2da6642d26..9b14480d54 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -167,7 +167,7 @@ store r buprepo = byteStorer $ \k b p -> do } else cmd feeder = \h -> do - meteredWrite p h b + meteredWrite p (S.hPut h) b hClose h in withCreateProcess cmd' (go feeder cmd') where diff --git a/Remote/Git.hs b/Remote/Git.hs index 1bc602a612..9deb67508b 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -557,6 +557,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met then return v else giveup "failed to retrieve content from remote" else P2PHelper.retrieve + (Annex.Content.RemoteVerify r) (\p -> Ssh.runProto r connpool (return (False, UnVerified)) (fallback p)) key file dest meterupdate | otherwise = giveup "copying from non-ssh, non-http remote not supported" diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 5b6f1ce93b..ee1377ae68 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -9,6 +9,7 @@ module Remote.Glacier (remote, jobList, checkSaneGlacierCommand) where import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L import Annex.Common @@ -168,7 +169,7 @@ store' r k b p = go =<< glacierEnv c gc u { std_in = CreatePipe } in liftIO $ withCreateProcess cmd (go' cmd) go' cmd (Just hin) _ _ pid = do - meteredWrite p hin b + meteredWrite p (S.hPut hin) b hClose hin forceSuccessProcess cmd pid go' _ _ _ _ _ = error "internal" diff --git a/Remote/Helper/Chunked/Legacy.hs b/Remote/Helper/Chunked/Legacy.hs index b236b8cb18..b3454ab2dc 100644 --- a/Remote/Helper/Chunked/Legacy.hs +++ b/Remote/Helper/Chunked/Legacy.hs @@ -11,6 +11,7 @@ import Annex.Common import Remote.Helper.Chunked import Utility.Metered +import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L {- This is an extension that's added to the usual file (or whatever) @@ -117,4 +118,4 @@ meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteStrin meteredWriteFileChunks meterupdate dest chunks feeder = withBinaryFile dest WriteMode $ \h -> forM_ chunks $ - meteredWrite meterupdate h <=< feeder + meteredWrite meterupdate (S.hPut h) <=< feeder diff --git a/Remote/Helper/P2P.hs b/Remote/Helper/P2P.hs index 844c4b9b82..7e2d13f2e2 100644 --- a/Remote/Helper/P2P.hs +++ b/Remote/Helper/P2P.hs @@ -1,6 +1,6 @@ {- Helpers for remotes using the git-annex P2P protocol. - - - Copyright 2016-2020 Joey Hess + - Copyright 2016-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -17,6 +17,7 @@ import Annex.Content import Messages.Progress import Utility.Metered import Types.NumCopies +import Backend import Control.Concurrent @@ -39,10 +40,11 @@ store runner k af p = do Just False -> giveup "transfer failed" Nothing -> remoteUnavail -retrieve :: (MeterUpdate -> ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Verification -retrieve runner k af dest p = +retrieve :: VerifyConfig -> (MeterUpdate -> ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Verification +retrieve verifyconfig runner k af dest p = do + iv <- startVerifyKeyContentIncrementally verifyconfig k metered (Just p) k $ \m p' -> - runner p' (P2P.get dest k af m p') >>= \case + runner p' (P2P.get dest k iv af m p') >>= \case Just (True, v) -> return v Just (False, _) -> giveup "transfer failed" Nothing -> remoteUnavail diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index ce1ebe22a0..646ce4fdb5 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -295,7 +295,7 @@ sink dest enc c mh mp content = case (enc, mh, content) of Just h -> liftIO $ b `streamto` h Nothing -> liftIO $ bracket opendest hClose (b `streamto`) streamto b h = case mp of - Just p -> meteredWrite p h b + Just p -> meteredWrite p (S.hPut h) b Nothing -> L.hPut h b opendest = openBinaryFile dest WriteMode diff --git a/Remote/P2P.hs b/Remote/P2P.hs index 21cf5b42e1..cc39ea9e0c 100644 --- a/Remote/P2P.hs +++ b/Remote/P2P.hs @@ -13,6 +13,7 @@ module Remote.P2P ( import Annex.Common import qualified Annex import qualified P2P.Protocol as P2P +import qualified Annex.Content import P2P.Address import P2P.Annex import P2P.IO @@ -56,7 +57,7 @@ chainGen addr r u rc gc rs = do , cost = cst , name = Git.repoDescribe r , storeKey = store (const protorunner) - , retrieveKeyFile = retrieve (const protorunner) + , retrieveKeyFile = retrieve (Annex.Content.RemoteVerify this) (const protorunner) , retrieveKeyFileCheap = Nothing , retrievalSecurityPolicy = RetrievalAllKeysSecure , removeKey = remove protorunner diff --git a/Utility/Metered.hs b/Utility/Metered.hs index 1715f0bf77..4683c13d6d 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -1,6 +1,6 @@ {- Metered IO and actions - - - Copyright 2012-2020 Joey Hess + - Copyright 2012-2021 Joey Hess - - License: BSD-2-clause -} @@ -118,23 +118,24 @@ withMeteredFile :: FilePath -> MeterUpdate -> (L.ByteString -> IO a) -> IO a withMeteredFile f meterupdate a = withBinaryFile f ReadMode $ \h -> hGetContentsMetered h meterupdate >>= a -{- Writes a ByteString to a Handle, updating a meter as it's written. -} -meteredWrite :: MeterUpdate -> Handle -> L.ByteString -> IO () -meteredWrite meterupdate h = void . meteredWrite' meterupdate h +{- Calls the action repeatedly with chunks from the lazy ByteString. + - Updates the meter after each chunk is processed. -} +meteredWrite :: MeterUpdate -> (S.ByteString -> IO ()) -> L.ByteString -> IO () +meteredWrite meterupdate a = void . meteredWrite' meterupdate a -meteredWrite' :: MeterUpdate -> Handle -> L.ByteString -> IO BytesProcessed -meteredWrite' meterupdate h = go zeroBytesProcessed . L.toChunks +meteredWrite' :: MeterUpdate -> (S.ByteString -> IO ()) -> L.ByteString -> IO BytesProcessed +meteredWrite' meterupdate a = go zeroBytesProcessed . L.toChunks where go sofar [] = return sofar go sofar (c:cs) = do - S.hPut h c + a c let !sofar' = addBytesProcessed sofar $ S.length c meterupdate sofar' go sofar' cs meteredWriteFile :: MeterUpdate -> FilePath -> L.ByteString -> IO () meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h -> - meteredWrite meterupdate h b + meteredWrite meterupdate (S.hPut h) b {- Applies an offset to a MeterUpdate. This can be useful when - performing a sequence of actions, such as multiple meteredWriteFiles, diff --git a/git-annex.cabal b/git-annex.cabal index 187aa31761..86f3a7829a 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -671,9 +671,10 @@ Executable git-annex Annex.UpdateInstead Annex.UUID Annex.Url + Annex.VariantFile Annex.VectorClock Annex.VectorClock.Utility - Annex.VariantFile + Annex.Verify Annex.Version Annex.View Annex.View.ViewedFile From 94f6210b685cba115f45c2e580d8047c8cba23bc Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 9 Feb 2021 22:15:33 -0400 Subject: [PATCH 3/4] deal with possibility of short read by S.hGet It may read less than requested, and may yield an empty string if the file was somehow shorter than expected. --- P2P/Annex.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/P2P/Annex.hs b/P2P/Annex.hs index c28c3eba71..e039f67a4e 100644 --- a/P2P/Annex.hs +++ b/P2P/Annex.hs @@ -172,11 +172,14 @@ runLocal runst runner a = case a of where go iv n | n == 0 = return () - | n > fromIntegral defaultChunkSize = do - updateIncremental iv =<< S.hGet h defaultChunkSize - go iv (n - fromIntegral defaultChunkSize) - | otherwise = - updateIncremental iv =<< S.hGet h (fromIntegral n) + | otherwise = do + let c = if n > fromIntegral defaultChunkSize + then defaultChunkSize + else fromIntegral n + b <- S.hGet h c + updateIncremental iv b + unless (b == S.empty) $ + go iv (n - fromIntegral (S.length b)) storefile dest (Offset o) (Len l) getb incrementalverifier validitycheck p ti = do v <- runner getb From 4b63e932f3161447943535b371cdadf9f235222f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 10 Feb 2021 12:41:05 -0400 Subject: [PATCH 4/4] incremental checksum on upload to ssh or p2p --- CHANGELOG | 4 ++-- P2P/Annex.hs | 8 +++++--- P2P/Protocol.hs | 25 +++++++++++++------------ 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index b889050c88..c7328abf21 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -24,8 +24,8 @@ git-annex (8.20210128) UNRELEASED; urgency=medium * Include libkqueue.h file needed to build the assistant on BSDs. * Tahoe: Avoid verifying hash after download, since tahoe does sufficient verification itself. - * Checksum as content is received from a remote git-annex repository, - rather than doing it in a second pass. + * Checksum as content is received from a remote git-annex repository + over ssh/p2p protocols, rather than doing it in a second pass. -- Joey Hess Thu, 28 Jan 2021 12:34:32 -0400 diff --git a/P2P/Annex.hs b/P2P/Annex.hs index e039f67a4e..f0da379903 100644 --- a/P2P/Annex.hs +++ b/P2P/Annex.hs @@ -24,6 +24,7 @@ import Logs.Location import Types.NumCopies import Utility.Metered import Types.Backend (IncrementalVerifier(..)) +import Backend import Control.Monad.Free import Control.Concurrent.STM @@ -76,10 +77,11 @@ runLocal runst runner a = case a of -- Remote.P2P and Remote.Git. let rsp = RetrievalAllKeysSecure v <- tryNonAsync $ do + iv <- startVerifyKeyContentIncrementally DefaultVerify k let runtransfer ti = Right <$> transfer download' k af Nothing (\p -> logStatusAfter k $ getViaTmp rsp DefaultVerify k af $ \tmp -> - storefile (fromRawFilePath tmp) o l getb Nothing validitycheck p ti) + storefile (fromRawFilePath tmp) o l getb iv validitycheck p ti) let fallback = return $ Left $ ProtoFailureMessage "transfer already in progress, or unable to take transfer lock" checktransfer runtransfer fallback @@ -87,10 +89,10 @@ runLocal runst runner a = case a of Left e -> return $ Left $ ProtoFailureException e Right (Left e) -> return $ Left e Right (Right ok) -> runner (next ok) - StoreContentTo dest incrementalverifier o l getb validitycheck next -> do + StoreContentTo dest iv o l getb validitycheck next -> do v <- tryNonAsync $ do let runtransfer ti = Right - <$> storefile dest o l getb incrementalverifier validitycheck nullMeterUpdate ti + <$> storefile dest o l getb iv validitycheck nullMeterUpdate ti let fallback = return $ Left $ ProtoFailureMessage "transfer failed" checktransfer runtransfer fallback diff --git a/P2P/Protocol.hs b/P2P/Protocol.hs index 568bccc5cf..353167406d 100644 --- a/P2P/Protocol.hs +++ b/P2P/Protocol.hs @@ -17,8 +17,9 @@ import qualified Utility.SimpleProtocol as Proto import Types (Annex) import Types.Key import Types.UUID -import Types.Remote (Verification(..), unVerified) +import Types.Remote (Verification(..)) import Types.Backend (IncrementalVerifier(..)) +import Types.Transfer import Utility.AuthToken import Utility.Applicative import Utility.PartialPrelude @@ -504,10 +505,9 @@ serveAuthed servermode myuuid = void $ serverLoop handler then net $ sendMessage ALREADY_HAVE else do let sizer = tmpContentSize key - let storer = \o l b v -> unVerified $ - storeContent key af o l b v - (ok, _v) <- receiveContent Nothing nullMeterUpdate sizer storer PUT_FROM - when ok $ + let storer = storeContent key af + v <- receiveContent Nothing nullMeterUpdate sizer storer PUT_FROM + when (observeBool v) $ local $ setPresent key myuuid return ServerContinue @@ -533,12 +533,13 @@ sendContent key af offset@(Offset n) p = go =<< local (contentSize key) checkSuccess receiveContent - :: Maybe Meter + :: Observable t + => Maybe Meter -> MeterUpdate -> Local Len - -> (Offset -> Len -> Proto L.ByteString -> Proto (Maybe Validity) -> Local (Bool, Verification)) + -> (Offset -> Len -> Proto L.ByteString -> Proto (Maybe Validity) -> Local t) -> (Offset -> Message) - -> Proto (Bool, Verification) + -> Proto t receiveContent mm p sizer storer mkmsg = do Len n <- local sizer let p' = offsetMeterUpdate p (toBytesProcessed n) @@ -558,14 +559,14 @@ receiveContent mm p sizer storer mkmsg = do net $ sendMessage (ERROR "expected VALID or INVALID") return Nothing else return Nothing - (ok, v) <- local $ storer offset len + v <- local $ storer offset len (net (receiveBytes len p')) validitycheck - sendSuccess ok - return (ok, v) + sendSuccess (observeBool v) + return v _ -> do net $ sendMessage (ERROR "expected DATA") - return (False, UnVerified) + return observeFailure checkSuccess :: Proto Bool checkSuccess = do