diff --git a/Annex/Content/Presence.hs b/Annex/Content/Presence.hs index 05ff5715e1..6d9d158649 100644 --- a/Annex/Content/Presence.hs +++ b/Annex/Content/Presence.hs @@ -26,10 +26,10 @@ module Annex.Content.Presence ( import Annex.Common import qualified Annex +import Annex.Verify import Annex.LockPool import Annex.WorkerPool import Types.Remote (unVerified, Verification(..), RetrievalSecurityPolicy(..)) -import qualified Types.Remote import qualified Types.Backend import qualified Backend import qualified Database.Keys @@ -231,16 +231,3 @@ warnUnverifiableInsecure k = warning $ unwords ] where kv = decodeBS (formatKeyVariety (fromKey keyVariety k)) - -data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify - -shouldVerify :: VerifyConfig -> Annex Bool -shouldVerify AlwaysVerify = return True -shouldVerify NoVerify = return False -shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig -shouldVerify (RemoteVerify r) = - (shouldVerify DefaultVerify - <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))) - -- Export remotes are not key/value stores, so always verify - -- content from them even when verification is disabled. - <||> Types.Remote.isExportSupported r diff --git a/Annex/Verify.hs b/Annex/Verify.hs new file mode 100644 index 0000000000..6d1a6ab37f --- /dev/null +++ b/Annex/Verify.hs @@ -0,0 +1,25 @@ +{- verification + - + - Copyright 2010-2021 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Verify where + +import Annex.Common +import qualified Annex +import qualified Types.Remote + +data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify + +shouldVerify :: VerifyConfig -> Annex Bool +shouldVerify AlwaysVerify = return True +shouldVerify NoVerify = return False +shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig +shouldVerify (RemoteVerify r) = + (shouldVerify DefaultVerify + <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))) + -- Export remotes are not key/value stores, so always verify + -- content from them even when verification is disabled. + <||> Types.Remote.isExportSupported r diff --git a/Backend.hs b/Backend.hs index 5769c840cc..76ba12313a 100644 --- a/Backend.hs +++ b/Backend.hs @@ -1,6 +1,6 @@ {- git-annex key/value backends - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -17,11 +17,13 @@ module Backend ( isStableKey, isCryptographicallySecure, isVerifiable, + startVerifyKeyContentIncrementally, ) where import Annex.Common import qualified Annex import Annex.CheckAttr +import Annex.Verify import Types.Key import Types.KeySource import qualified Types.Backend as B @@ -127,3 +129,14 @@ isCryptographicallySecure k = maybe False (`B.isCryptographicallySecure` k) isVerifiable :: Key -> Annex Bool isVerifiable k = maybe False (isJust . B.verifyKeyContent) <$> maybeLookupBackendVariety (fromKey keyVariety k) + +startVerifyKeyContentIncrementally :: VerifyConfig -> Key -> Annex (Maybe B.IncrementalVerifier) +startVerifyKeyContentIncrementally verifyconfig k = + ifM (shouldVerify verifyconfig) + ( maybeLookupBackendVariety (fromKey keyVariety k) >>= \case + Just b -> case B.verifyKeyContentIncrementally b of + Just v -> Just <$> v k + Nothing -> return Nothing + Nothing -> return Nothing + , return Nothing + ) diff --git a/Backend/External.hs b/Backend/External.hs index 335dee24f2..7ad8303f8c 100644 --- a/Backend/External.hs +++ b/Backend/External.hs @@ -67,6 +67,7 @@ makeBackend' ebname@(ExternalBackendName bname) hasext (Right p) = do -- bump if progress handling is later added. nullMeterUpdate else Nothing + , verifyKeyContentIncrementally = Nothing , canUpgradeKey = Nothing , fastMigrate = Nothing , isStableKey = const isstable @@ -80,6 +81,7 @@ unavailBackend (ExternalBackendName bname) hasext = { backendVariety = ExternalKey bname hasext , genKey = Nothing , verifyKeyContent = Nothing + , verifyKeyContentIncrementally = Nothing , canUpgradeKey = Nothing , fastMigrate = Nothing , isStableKey = const False diff --git a/Backend/Hash.hs b/Backend/Hash.hs index b01ca47e4d..0e723dea5d 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -1,6 +1,6 @@ {- git-annex hashing backends - - - Copyright 2011-2020 Joey Hess + - Copyright 2011-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -28,6 +28,7 @@ import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Lazy as L import Control.DeepSeq import Control.Exception (evaluate) +import Control.Concurrent.MVar data Hash = MD5Hash @@ -75,6 +76,7 @@ genBackend hash = Backend { backendVariety = hashKeyVariety hash (HasExt False) , genKey = Just (keyValue hash) , verifyKeyContent = Just $ checkKeyChecksum hash + , verifyKeyContentIncrementally = Just $ checkKeyChecksumIncremental hash , canUpgradeKey = Just needsUpgrade , fastMigrate = Just trivialMigrate , isStableKey = const True @@ -116,8 +118,6 @@ keyValueE hash source meterupdate = keyValue hash source meterupdate >>= addE source (const $ hashKeyVariety hash (HasExt True)) -{- A key's checksum is checked during fsck when it's content is present - - except for in fast mode. -} checkKeyChecksum :: Hash -> Key -> RawFilePath -> Annex Bool checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do fast <- Annex.getState Annex.fast @@ -125,22 +125,28 @@ checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do case (exists, fast) of (True, False) -> do showAction "checksum" - check <$> hashFile hash file nullMeterUpdate + sameCheckSum key + <$> hashFile hash file nullMeterUpdate _ -> return True where - expected = decodeBS (keyHash key) - check s - | s == expected = True - {- A bug caused checksums to be prefixed with \ in some - - cases; still accept these as legal now that the bug has been - - fixed. -} - | '\\' : s == expected = True - | otherwise = False - hwfault e = do warning $ "hardware fault: " ++ show e return False +sameCheckSum :: Key -> String -> Bool +sameCheckSum key s + | s == expected = True + {- A bug caused checksums to be prefixed with \ in some + - cases; still accept these as legal now that the bug + - has been fixed. -} + | '\\' : s == expected = True + | otherwise = False + where + expected = decodeBS (keyHash key) + +checkKeyChecksumIncremental :: Hash -> Key -> Annex IncrementalVerifier +checkKeyChecksumIncremental hash key = liftIO $ (snd $ hasher hash) key + keyHash :: Key -> S.ByteString keyHash = fst . splitKeyNameExtension @@ -195,79 +201,97 @@ trivialMigrate' oldkey newbackend afile maxextlen hashFile :: Hash -> RawFilePath -> MeterUpdate -> Annex String hashFile hash file meterupdate = liftIO $ withMeteredFile (fromRawFilePath file) meterupdate $ \b -> do - let h = hasher b + let h = (fst $ hasher hash) b -- Force full evaluation of hash so whole file is read -- before returning. evaluate (rnf h) return h - where - hasher = case hash of - MD5Hash -> md5Hasher - SHA1Hash -> sha1Hasher - SHA2Hash hashsize -> sha2Hasher hashsize - SHA3Hash hashsize -> sha3Hasher hashsize - SkeinHash hashsize -> skeinHasher hashsize - Blake2bHash hashsize -> blake2bHasher hashsize - Blake2bpHash hashsize -> blake2bpHasher hashsize - Blake2sHash hashsize -> blake2sHasher hashsize - Blake2spHash hashsize -> blake2spHasher hashsize -sha2Hasher :: HashSize -> (L.ByteString -> String) +type Hasher = (L.ByteString -> String, Key -> IO IncrementalVerifier) + +hasher :: Hash -> Hasher +hasher MD5Hash = md5Hasher +hasher SHA1Hash = sha1Hasher +hasher (SHA2Hash hashsize) = sha2Hasher hashsize +hasher (SHA3Hash hashsize) = sha3Hasher hashsize +hasher (SkeinHash hashsize) = skeinHasher hashsize +hasher (Blake2bHash hashsize) = blake2bHasher hashsize +hasher (Blake2bpHash hashsize) = blake2bpHasher hashsize +hasher (Blake2sHash hashsize) = blake2sHasher hashsize +hasher (Blake2spHash hashsize) = blake2spHasher hashsize + +mkHasher :: HashAlgorithm h => (L.ByteString -> Digest h) -> Context h -> Hasher +mkHasher h c = (show . h, mkIncrementalVerifier c) + +sha2Hasher :: HashSize -> Hasher sha2Hasher (HashSize hashsize) - | hashsize == 256 = use sha2_256 - | hashsize == 224 = use sha2_224 - | hashsize == 384 = use sha2_384 - | hashsize == 512 = use sha2_512 - | otherwise = error $ "unsupported SHA size " ++ show hashsize - where - use hasher = show . hasher + | hashsize == 256 = mkHasher sha2_256 sha2_256_context + | hashsize == 224 = mkHasher sha2_224 sha2_224_context + | hashsize == 384 = mkHasher sha2_384 sha2_384_context + | hashsize == 512 = mkHasher sha2_512 sha2_512_context + | otherwise = error $ "unsupported SHA2 size " ++ show hashsize -sha3Hasher :: HashSize -> (L.ByteString -> String) +sha3Hasher :: HashSize -> Hasher sha3Hasher (HashSize hashsize) - | hashsize == 256 = show . sha3_256 - | hashsize == 224 = show . sha3_224 - | hashsize == 384 = show . sha3_384 - | hashsize == 512 = show . sha3_512 + | hashsize == 256 = mkHasher sha3_256 sha3_256_context + | hashsize == 224 = mkHasher sha3_224 sha3_224_context + | hashsize == 384 = mkHasher sha3_384 sha3_384_context + | hashsize == 512 = mkHasher sha3_512 sha3_512_context | otherwise = error $ "unsupported SHA3 size " ++ show hashsize -skeinHasher :: HashSize -> (L.ByteString -> String) +skeinHasher :: HashSize -> Hasher skeinHasher (HashSize hashsize) - | hashsize == 256 = show . skein256 - | hashsize == 512 = show . skein512 + | hashsize == 256 = mkHasher skein256 skein256_context + | hashsize == 512 = mkHasher skein512 skein512_context | otherwise = error $ "unsupported SKEIN size " ++ show hashsize -blake2bHasher :: HashSize -> (L.ByteString -> String) +blake2bHasher :: HashSize -> Hasher blake2bHasher (HashSize hashsize) - | hashsize == 256 = show . blake2b_256 - | hashsize == 512 = show . blake2b_512 - | hashsize == 160 = show . blake2b_160 - | hashsize == 224 = show . blake2b_224 - | hashsize == 384 = show . blake2b_384 + | hashsize == 256 = mkHasher blake2b_256 blake2b_256_context + | hashsize == 512 = mkHasher blake2b_512 blake2b_512_context + | hashsize == 160 = mkHasher blake2b_160 blake2b_160_context + | hashsize == 224 = mkHasher blake2b_224 blake2b_224_context + | hashsize == 384 = mkHasher blake2b_384 blake2b_384_context | otherwise = error $ "unsupported BLAKE2B size " ++ show hashsize -blake2bpHasher :: HashSize -> (L.ByteString -> String) +blake2bpHasher :: HashSize -> Hasher blake2bpHasher (HashSize hashsize) - | hashsize == 512 = show . blake2bp_512 + | hashsize == 512 = mkHasher blake2bp_512 blake2bp_512_context | otherwise = error $ "unsupported BLAKE2BP size " ++ show hashsize -blake2sHasher :: HashSize -> (L.ByteString -> String) +blake2sHasher :: HashSize -> Hasher blake2sHasher (HashSize hashsize) - | hashsize == 256 = show . blake2s_256 - | hashsize == 160 = show . blake2s_160 - | hashsize == 224 = show . blake2s_224 + | hashsize == 256 = mkHasher blake2s_256 blake2s_256_context + | hashsize == 160 = mkHasher blake2s_160 blake2s_160_context + | hashsize == 224 = mkHasher blake2s_224 blake2s_224_context | otherwise = error $ "unsupported BLAKE2S size " ++ show hashsize -blake2spHasher :: HashSize -> (L.ByteString -> String) +blake2spHasher :: HashSize -> Hasher blake2spHasher (HashSize hashsize) - | hashsize == 256 = show . blake2sp_256 - | hashsize == 224 = show . blake2sp_224 + | hashsize == 256 = mkHasher blake2sp_256 blake2sp_256_context + | hashsize == 224 = mkHasher blake2sp_224 blake2sp_224_context | otherwise = error $ "unsupported BLAKE2SP size " ++ show hashsize -sha1Hasher :: L.ByteString -> String -sha1Hasher = show . sha1 +sha1Hasher :: Hasher +sha1Hasher = mkHasher sha1 sha1_context -md5Hasher :: L.ByteString -> String -md5Hasher = show . md5 +md5Hasher :: Hasher +md5Hasher = mkHasher md5 md5_context + +mkIncrementalVerifier :: HashAlgorithm h => Context h -> Key -> IO IncrementalVerifier +mkIncrementalVerifier ctx key = do + v <- newMVar ctx + return $ IncrementalVerifier + { updateIncremental = \b -> do + ctx' <- takeMVar v + let ctx'' = hashUpdate ctx' b + evaluate $ rnf ctx'' + putMVar v ctx'' + , finalizeIncremental = do + ctx' <- takeMVar v + let digest = hashFinalize ctx' + return $ sameCheckSum key (show digest) + } {- A varient of the SHA256E backend, for testing that needs special keys - that cannot collide with legitimate keys in the repository. diff --git a/Backend/URL.hs b/Backend/URL.hs index 5c8235760a..0468cbbe36 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -23,6 +23,7 @@ backend = Backend { backendVariety = URLKey , genKey = Nothing , verifyKeyContent = Nothing + , verifyKeyContentIncrementally = Nothing , canUpgradeKey = Nothing , fastMigrate = Nothing -- The content of an url can change at any time, so URL keys are diff --git a/Backend/WORM.hs b/Backend/WORM.hs index 77eb1c9c72..af116a8077 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -26,6 +26,7 @@ backend = Backend { backendVariety = WORMKey , genKey = Just keyValue , verifyKeyContent = Nothing + , verifyKeyContentIncrementally = Nothing , canUpgradeKey = Just needsUpgrade , fastMigrate = Just removeProblemChars , isStableKey = const True diff --git a/CHANGELOG b/CHANGELOG index c8ed840b86..c7328abf21 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -24,6 +24,8 @@ git-annex (8.20210128) UNRELEASED; urgency=medium * Include libkqueue.h file needed to build the assistant on BSDs. * Tahoe: Avoid verifying hash after download, since tahoe does sufficient verification itself. + * Checksum as content is received from a remote git-annex repository + over ssh/p2p protocols, rather than doing it in a second pass. -- Joey Hess Thu, 28 Jan 2021 12:34:32 -0400 diff --git a/P2P/Annex.hs b/P2P/Annex.hs index b933575c41..f0da379903 100644 --- a/P2P/Annex.hs +++ b/P2P/Annex.hs @@ -1,6 +1,6 @@ {- P2P protocol, Annex implementation - - - Copyright 2016-2018 Joey Hess + - Copyright 2016-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -23,9 +23,12 @@ import P2P.IO import Logs.Location import Types.NumCopies import Utility.Metered +import Types.Backend (IncrementalVerifier(..)) +import Backend import Control.Monad.Free import Control.Concurrent.STM +import qualified Data.ByteString as S -- Full interpreter for Proto, that can receive and send objects. runFullProto :: RunState -> P2PConnection -> Proto a -> Annex (Either ProtoFailure a) @@ -74,10 +77,11 @@ runLocal runst runner a = case a of -- Remote.P2P and Remote.Git. let rsp = RetrievalAllKeysSecure v <- tryNonAsync $ do + iv <- startVerifyKeyContentIncrementally DefaultVerify k let runtransfer ti = Right <$> transfer download' k af Nothing (\p -> logStatusAfter k $ getViaTmp rsp DefaultVerify k af $ \tmp -> - storefile (fromRawFilePath tmp) o l getb validitycheck p ti) + storefile (fromRawFilePath tmp) o l getb iv validitycheck p ti) let fallback = return $ Left $ ProtoFailureMessage "transfer already in progress, or unable to take transfer lock" checktransfer runtransfer fallback @@ -85,10 +89,10 @@ runLocal runst runner a = case a of Left e -> return $ Left $ ProtoFailureException e Right (Left e) -> return $ Left e Right (Right ok) -> runner (next ok) - StoreContentTo dest o l getb validitycheck next -> do + StoreContentTo dest iv o l getb validitycheck next -> do v <- tryNonAsync $ do let runtransfer ti = Right - <$> storefile dest o l getb validitycheck nullMeterUpdate ti + <$> storefile dest o l getb iv validitycheck nullMeterUpdate ti let fallback = return $ Left $ ProtoFailureMessage "transfer failed" checktransfer runtransfer fallback @@ -153,16 +157,44 @@ runLocal runst runner a = case a of -- Transfer logs are updated higher in the stack when -- a client. Client _ -> ta nullMeterUpdate + + resumefromoffset o incrementalverifier p h + | o /= 0 = do + p' <- case incrementalverifier of + Just iv -> do + go iv o + return p + _ -> return $ offsetMeterUpdate p (toBytesProcessed o) + -- Make sure the handle is seeked to the offset. + -- (Reading the file probably left it there + -- when that was done, but let's be sure.) + hSeek h AbsoluteSeek o + return p' + | otherwise = return p + where + go iv n + | n == 0 = return () + | otherwise = do + let c = if n > fromIntegral defaultChunkSize + then defaultChunkSize + else fromIntegral n + b <- S.hGet h c + updateIncremental iv b + unless (b == S.empty) $ + go iv (n - fromIntegral (S.length b)) - storefile dest (Offset o) (Len l) getb validitycheck p ti = do - let p' = offsetMeterUpdate p (toBytesProcessed o) + storefile dest (Offset o) (Len l) getb incrementalverifier validitycheck p ti = do v <- runner getb case v of Right b -> do liftIO $ withBinaryFile dest ReadWriteMode $ \h -> do - when (o /= 0) $ - hSeek h AbsoluteSeek o - meteredWrite p' h b + p' <- resumefromoffset o incrementalverifier p h + let writechunk = case incrementalverifier of + Nothing -> \c -> S.hPut h c + Just iv -> \c -> do + S.hPut h c + updateIncremental iv c + meteredWrite p' writechunk b indicatetransferred ti rightsize <- do @@ -170,8 +202,12 @@ runLocal runst runner a = case a of return (toInteger sz == l + o) runner validitycheck >>= \case - Right (Just Valid) -> - return (rightsize, UnVerified) + Right (Just Valid) -> case incrementalverifier of + Just iv -> ifM (liftIO (finalizeIncremental iv) <&&> pure rightsize) + ( return (True, Verified) + , return (False, UnVerified) + ) + Nothing -> return (rightsize, UnVerified) Right (Just Invalid) | l == 0 -> -- Special case, for when -- content was not diff --git a/P2P/IO.hs b/P2P/IO.hs index 9a71ba89f0..d089f1eb00 100644 --- a/P2P/IO.hs +++ b/P2P/IO.hs @@ -259,7 +259,7 @@ debugMessage conn prefix m = do -- connection. False is returned to indicate this problem. sendExactly :: Len -> L.ByteString -> Handle -> MeterUpdate -> IO Bool sendExactly (Len n) b h p = do - sent <- meteredWrite' p h (L.take (fromIntegral n) b) + sent <- meteredWrite' p (B.hPut h) (L.take (fromIntegral n) b) return (fromBytesProcessed sent == n) receiveExactly :: Len -> Handle -> MeterUpdate -> IO L.ByteString diff --git a/P2P/Protocol.hs b/P2P/Protocol.hs index 13d5ea8c3f..353167406d 100644 --- a/P2P/Protocol.hs +++ b/P2P/Protocol.hs @@ -2,7 +2,7 @@ - - See doc/design/p2p_protocol.mdwn - - - Copyright 2016-2020 Joey Hess + - Copyright 2016-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -17,7 +17,9 @@ import qualified Utility.SimpleProtocol as Proto import Types (Annex) import Types.Key import Types.UUID -import Types.Remote (Verification(..), unVerified) +import Types.Remote (Verification(..)) +import Types.Backend (IncrementalVerifier(..)) +import Types.Transfer import Utility.AuthToken import Utility.Applicative import Utility.PartialPrelude @@ -266,7 +268,7 @@ data LocalF c -- Note: The ByteString may not contain the entire remaining content -- of the key. Only once the temp file size == Len has the whole -- content been transferred. - | StoreContentTo FilePath Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c) + | StoreContentTo FilePath (Maybe IncrementalVerifier) Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c) -- ^ Like StoreContent, but stores the content to a temp file. | SetPresent Key UUID c | CheckContentPresent Key (Bool -> c) @@ -351,13 +353,13 @@ remove key = do net $ sendMessage (REMOVE key) checkSuccess -get :: FilePath -> Key -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification) -get dest key af m p = +get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification) +get dest key iv af m p = receiveContent (Just m) p sizer storer $ \offset -> GET offset (ProtoAssociatedFile af) key where sizer = fileSize dest - storer = storeContentTo dest + storer = storeContentTo dest iv put :: Key -> AssociatedFile -> MeterUpdate -> Proto Bool put key af p = do @@ -503,10 +505,9 @@ serveAuthed servermode myuuid = void $ serverLoop handler then net $ sendMessage ALREADY_HAVE else do let sizer = tmpContentSize key - let storer = \o l b v -> unVerified $ - storeContent key af o l b v - (ok, _v) <- receiveContent Nothing nullMeterUpdate sizer storer PUT_FROM - when ok $ + let storer = storeContent key af + v <- receiveContent Nothing nullMeterUpdate sizer storer PUT_FROM + when (observeBool v) $ local $ setPresent key myuuid return ServerContinue @@ -532,12 +533,13 @@ sendContent key af offset@(Offset n) p = go =<< local (contentSize key) checkSuccess receiveContent - :: Maybe Meter + :: Observable t + => Maybe Meter -> MeterUpdate -> Local Len - -> (Offset -> Len -> Proto L.ByteString -> Proto (Maybe Validity) -> Local (Bool, Verification)) + -> (Offset -> Len -> Proto L.ByteString -> Proto (Maybe Validity) -> Local t) -> (Offset -> Message) - -> Proto (Bool, Verification) + -> Proto t receiveContent mm p sizer storer mkmsg = do Len n <- local sizer let p' = offsetMeterUpdate p (toBytesProcessed n) @@ -557,14 +559,14 @@ receiveContent mm p sizer storer mkmsg = do net $ sendMessage (ERROR "expected VALID or INVALID") return Nothing else return Nothing - (ok, v) <- local $ storer offset len + v <- local $ storer offset len (net (receiveBytes len p')) validitycheck - sendSuccess ok - return (ok, v) + sendSuccess (observeBool v) + return v _ -> do net $ sendMessage (ERROR "expected DATA") - return (False, UnVerified) + return observeFailure checkSuccess :: Proto Bool checkSuccess = do diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 2da6642d26..9b14480d54 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -167,7 +167,7 @@ store r buprepo = byteStorer $ \k b p -> do } else cmd feeder = \h -> do - meteredWrite p h b + meteredWrite p (S.hPut h) b hClose h in withCreateProcess cmd' (go feeder cmd') where diff --git a/Remote/Git.hs b/Remote/Git.hs index 1bc602a612..9deb67508b 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -557,6 +557,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met then return v else giveup "failed to retrieve content from remote" else P2PHelper.retrieve + (Annex.Content.RemoteVerify r) (\p -> Ssh.runProto r connpool (return (False, UnVerified)) (fallback p)) key file dest meterupdate | otherwise = giveup "copying from non-ssh, non-http remote not supported" diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 5b6f1ce93b..ee1377ae68 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -9,6 +9,7 @@ module Remote.Glacier (remote, jobList, checkSaneGlacierCommand) where import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L import Annex.Common @@ -168,7 +169,7 @@ store' r k b p = go =<< glacierEnv c gc u { std_in = CreatePipe } in liftIO $ withCreateProcess cmd (go' cmd) go' cmd (Just hin) _ _ pid = do - meteredWrite p hin b + meteredWrite p (S.hPut hin) b hClose hin forceSuccessProcess cmd pid go' _ _ _ _ _ = error "internal" diff --git a/Remote/Helper/Chunked/Legacy.hs b/Remote/Helper/Chunked/Legacy.hs index b236b8cb18..b3454ab2dc 100644 --- a/Remote/Helper/Chunked/Legacy.hs +++ b/Remote/Helper/Chunked/Legacy.hs @@ -11,6 +11,7 @@ import Annex.Common import Remote.Helper.Chunked import Utility.Metered +import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L {- This is an extension that's added to the usual file (or whatever) @@ -117,4 +118,4 @@ meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteStrin meteredWriteFileChunks meterupdate dest chunks feeder = withBinaryFile dest WriteMode $ \h -> forM_ chunks $ - meteredWrite meterupdate h <=< feeder + meteredWrite meterupdate (S.hPut h) <=< feeder diff --git a/Remote/Helper/P2P.hs b/Remote/Helper/P2P.hs index 844c4b9b82..7e2d13f2e2 100644 --- a/Remote/Helper/P2P.hs +++ b/Remote/Helper/P2P.hs @@ -1,6 +1,6 @@ {- Helpers for remotes using the git-annex P2P protocol. - - - Copyright 2016-2020 Joey Hess + - Copyright 2016-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -17,6 +17,7 @@ import Annex.Content import Messages.Progress import Utility.Metered import Types.NumCopies +import Backend import Control.Concurrent @@ -39,10 +40,11 @@ store runner k af p = do Just False -> giveup "transfer failed" Nothing -> remoteUnavail -retrieve :: (MeterUpdate -> ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Verification -retrieve runner k af dest p = +retrieve :: VerifyConfig -> (MeterUpdate -> ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Verification +retrieve verifyconfig runner k af dest p = do + iv <- startVerifyKeyContentIncrementally verifyconfig k metered (Just p) k $ \m p' -> - runner p' (P2P.get dest k af m p') >>= \case + runner p' (P2P.get dest k iv af m p') >>= \case Just (True, v) -> return v Just (False, _) -> giveup "transfer failed" Nothing -> remoteUnavail diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index ce1ebe22a0..646ce4fdb5 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -295,7 +295,7 @@ sink dest enc c mh mp content = case (enc, mh, content) of Just h -> liftIO $ b `streamto` h Nothing -> liftIO $ bracket opendest hClose (b `streamto`) streamto b h = case mp of - Just p -> meteredWrite p h b + Just p -> meteredWrite p (S.hPut h) b Nothing -> L.hPut h b opendest = openBinaryFile dest WriteMode diff --git a/Remote/P2P.hs b/Remote/P2P.hs index 21cf5b42e1..cc39ea9e0c 100644 --- a/Remote/P2P.hs +++ b/Remote/P2P.hs @@ -13,6 +13,7 @@ module Remote.P2P ( import Annex.Common import qualified Annex import qualified P2P.Protocol as P2P +import qualified Annex.Content import P2P.Address import P2P.Annex import P2P.IO @@ -56,7 +57,7 @@ chainGen addr r u rc gc rs = do , cost = cst , name = Git.repoDescribe r , storeKey = store (const protorunner) - , retrieveKeyFile = retrieve (const protorunner) + , retrieveKeyFile = retrieve (Annex.Content.RemoteVerify this) (const protorunner) , retrieveKeyFileCheap = Nothing , retrievalSecurityPolicy = RetrievalAllKeysSecure , removeKey = remove protorunner diff --git a/Types/Backend.hs b/Types/Backend.hs index a2bf9d130c..5b5b0b6f99 100644 --- a/Types/Backend.hs +++ b/Types/Backend.hs @@ -2,7 +2,7 @@ - - Most things should not need this, using Types instead - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -11,16 +11,21 @@ module Types.Backend where import Types.Key import Types.KeySource - import Utility.Metered import Utility.FileSystemEncoding +import Data.ByteString (ByteString) + data BackendA a = Backend { backendVariety :: KeyVariety , genKey :: Maybe (KeySource -> MeterUpdate -> a Key) - -- Verifies the content of a key using a hash. This does not need - -- to be cryptographically secure. + -- Verifies the content of a key, stored in a file, using a hash. + -- This does not need to be cryptographically secure. , verifyKeyContent :: Maybe (Key -> RawFilePath -> a Bool) + -- Incrementally verifies the content of a key, using the same + -- hash as verifyKeyContent, but with the content provided + -- incrementally a peice at a time, until finalized. + , verifyKeyContentIncrementally :: Maybe (Key -> a IncrementalVerifier) -- Checks if a key can be upgraded to a better form. , canUpgradeKey :: Maybe (Key -> Bool) -- Checks if there is a fast way to migrate a key to a different @@ -38,3 +43,11 @@ instance Show (BackendA a) where instance Eq (BackendA a) where a == b = backendVariety a == backendVariety b + +data IncrementalVerifier = IncrementalVerifier + { updateIncremental :: ByteString -> IO () + -- ^ Called repeatedly on each peice of the content. + , finalizeIncremental :: IO Bool + -- ^ Called once the full content has been sent, returns true + -- if the hash verified. + } diff --git a/Utility/Hash.hs b/Utility/Hash.hs index 732009a8d1..e843e740ea 100644 --- a/Utility/Hash.hs +++ b/Utility/Hash.hs @@ -2,29 +2,57 @@ module Utility.Hash ( sha1, + sha1_context, sha2_224, + sha2_224_context, sha2_256, + sha2_256_context, sha2_384, + sha2_384_context, sha2_512, + sha2_512_context, sha3_224, + sha3_224_context, sha3_256, + sha3_256_context, sha3_384, + sha3_384_context, sha3_512, + sha3_512_context, skein256, + skein256_context, skein512, + skein512_context, blake2s_160, + blake2s_160_context, blake2s_224, + blake2s_224_context, blake2s_256, + blake2s_256_context, blake2sp_224, + blake2sp_224_context, blake2sp_256, + blake2sp_256_context, blake2b_160, + blake2b_160_context, blake2b_224, + blake2b_224_context, blake2b_256, + blake2b_256_context, blake2b_384, + blake2b_384_context, blake2b_512, + blake2b_512_context, blake2bp_512, + blake2bp_512_context, md5, + md5_context, md5s, + hashUpdate, + hashFinalize, + Digest, + HashAlgorithm, + Context, props_hashes_stable, Mac(..), calcMac, @@ -35,78 +63,147 @@ import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L import qualified Data.Text as T import qualified Data.Text.Encoding as T -import "cryptonite" Crypto.MAC.HMAC +import "cryptonite" Crypto.MAC.HMAC hiding (Context) import "cryptonite" Crypto.Hash sha1 :: L.ByteString -> Digest SHA1 sha1 = hashlazy +sha1_context :: Context SHA1 +sha1_context = hashInit + sha2_224 :: L.ByteString -> Digest SHA224 sha2_224 = hashlazy +sha2_224_context :: Context SHA224 +sha2_224_context = hashInit + sha2_256 :: L.ByteString -> Digest SHA256 sha2_256 = hashlazy +sha2_256_context :: Context SHA256 +sha2_256_context = hashInit + sha2_384 :: L.ByteString -> Digest SHA384 sha2_384 = hashlazy +sha2_384_context :: Context SHA384 +sha2_384_context = hashInit + sha2_512 :: L.ByteString -> Digest SHA512 sha2_512 = hashlazy +sha2_512_context :: Context SHA512 +sha2_512_context = hashInit + sha3_224 :: L.ByteString -> Digest SHA3_224 sha3_224 = hashlazy +sha3_224_context :: Context SHA3_224 +sha3_224_context = hashInit + sha3_256 :: L.ByteString -> Digest SHA3_256 sha3_256 = hashlazy +sha3_256_context :: Context SHA3_256 +sha3_256_context = hashInit + sha3_384 :: L.ByteString -> Digest SHA3_384 sha3_384 = hashlazy +sha3_384_context :: Context SHA3_384 +sha3_384_context = hashInit + sha3_512 :: L.ByteString -> Digest SHA3_512 sha3_512 = hashlazy +sha3_512_context :: Context SHA3_512 +sha3_512_context = hashInit + skein256 :: L.ByteString -> Digest Skein256_256 skein256 = hashlazy +skein256_context :: Context Skein256_256 +skein256_context = hashInit + skein512 :: L.ByteString -> Digest Skein512_512 skein512 = hashlazy +skein512_context :: Context Skein512_512 +skein512_context = hashInit + blake2s_160 :: L.ByteString -> Digest Blake2s_160 blake2s_160 = hashlazy +blake2s_160_context :: Context Blake2s_160 +blake2s_160_context = hashInit + blake2s_224 :: L.ByteString -> Digest Blake2s_224 blake2s_224 = hashlazy +blake2s_224_context :: Context Blake2s_224 +blake2s_224_context = hashInit + blake2s_256 :: L.ByteString -> Digest Blake2s_256 blake2s_256 = hashlazy +blake2s_256_context :: Context Blake2s_256 +blake2s_256_context = hashInit + blake2sp_224 :: L.ByteString -> Digest Blake2sp_224 blake2sp_224 = hashlazy +blake2sp_224_context :: Context Blake2sp_224 +blake2sp_224_context = hashInit + blake2sp_256 :: L.ByteString -> Digest Blake2sp_256 blake2sp_256 = hashlazy +blake2sp_256_context :: Context Blake2sp_256 +blake2sp_256_context = hashInit + blake2b_160 :: L.ByteString -> Digest Blake2b_160 blake2b_160 = hashlazy +blake2b_160_context :: Context Blake2b_160 +blake2b_160_context = hashInit + blake2b_224 :: L.ByteString -> Digest Blake2b_224 blake2b_224 = hashlazy +blake2b_224_context :: Context Blake2b_224 +blake2b_224_context = hashInit + blake2b_256 :: L.ByteString -> Digest Blake2b_256 blake2b_256 = hashlazy +blake2b_256_context :: Context Blake2b_256 +blake2b_256_context = hashInit + blake2b_384 :: L.ByteString -> Digest Blake2b_384 blake2b_384 = hashlazy +blake2b_384_context :: Context Blake2b_384 +blake2b_384_context = hashInit + blake2b_512 :: L.ByteString -> Digest Blake2b_512 blake2b_512 = hashlazy +blake2b_512_context :: Context Blake2b_512 +blake2b_512_context = hashInit + blake2bp_512 :: L.ByteString -> Digest Blake2bp_512 blake2bp_512 = hashlazy +blake2bp_512_context :: Context Blake2bp_512 +blake2bp_512_context = hashInit + md5 :: L.ByteString -> Digest MD5 md5 = hashlazy +md5_context :: Context MD5 +md5_context = hashInit + md5s :: S.ByteString -> Digest MD5 md5s = hash diff --git a/Utility/Metered.hs b/Utility/Metered.hs index 1715f0bf77..4683c13d6d 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -1,6 +1,6 @@ {- Metered IO and actions - - - Copyright 2012-2020 Joey Hess + - Copyright 2012-2021 Joey Hess - - License: BSD-2-clause -} @@ -118,23 +118,24 @@ withMeteredFile :: FilePath -> MeterUpdate -> (L.ByteString -> IO a) -> IO a withMeteredFile f meterupdate a = withBinaryFile f ReadMode $ \h -> hGetContentsMetered h meterupdate >>= a -{- Writes a ByteString to a Handle, updating a meter as it's written. -} -meteredWrite :: MeterUpdate -> Handle -> L.ByteString -> IO () -meteredWrite meterupdate h = void . meteredWrite' meterupdate h +{- Calls the action repeatedly with chunks from the lazy ByteString. + - Updates the meter after each chunk is processed. -} +meteredWrite :: MeterUpdate -> (S.ByteString -> IO ()) -> L.ByteString -> IO () +meteredWrite meterupdate a = void . meteredWrite' meterupdate a -meteredWrite' :: MeterUpdate -> Handle -> L.ByteString -> IO BytesProcessed -meteredWrite' meterupdate h = go zeroBytesProcessed . L.toChunks +meteredWrite' :: MeterUpdate -> (S.ByteString -> IO ()) -> L.ByteString -> IO BytesProcessed +meteredWrite' meterupdate a = go zeroBytesProcessed . L.toChunks where go sofar [] = return sofar go sofar (c:cs) = do - S.hPut h c + a c let !sofar' = addBytesProcessed sofar $ S.length c meterupdate sofar' go sofar' cs meteredWriteFile :: MeterUpdate -> FilePath -> L.ByteString -> IO () meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h -> - meteredWrite meterupdate h b + meteredWrite meterupdate (S.hPut h) b {- Applies an offset to a MeterUpdate. This can be useful when - performing a sequence of actions, such as multiple meteredWriteFiles, diff --git a/git-annex.cabal b/git-annex.cabal index 187aa31761..86f3a7829a 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -671,9 +671,10 @@ Executable git-annex Annex.UpdateInstead Annex.UUID Annex.Url + Annex.VariantFile Annex.VectorClock Annex.VectorClock.Utility - Annex.VariantFile + Annex.Verify Annex.Version Annex.View Annex.View.ViewedFile