incremental checksum on download from ssh or p2p

Checksum as content is received from a remote git-annex repository, rather
than doing it in a second pass.

Not tested at all yet, but I imagine it will work!

Not implemented for any special remotes, and also not implemented for
copies from local remotes. It may be that, for local remotes, it will
suffice to use rsync, rely on its checksumming, and simply return Verified.
(It would still make a checksumming pass when cp is used for COW, I guess.)
This commit is contained in:
Joey Hess 2021-02-09 17:03:27 -04:00
parent ed684f651e
commit 62e152f210
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
17 changed files with 118 additions and 52 deletions

View file

@ -26,10 +26,10 @@ module Annex.Content.Presence (
import Annex.Common
import qualified Annex
import Annex.Verify
import Annex.LockPool
import Annex.WorkerPool
import Types.Remote (unVerified, Verification(..), RetrievalSecurityPolicy(..))
import qualified Types.Remote
import qualified Types.Backend
import qualified Backend
import qualified Database.Keys
@ -231,16 +231,3 @@ warnUnverifiableInsecure k = warning $ unwords
]
where
kv = decodeBS (formatKeyVariety (fromKey keyVariety k))
data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify
shouldVerify :: VerifyConfig -> Annex Bool
shouldVerify AlwaysVerify = return True
shouldVerify NoVerify = return False
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
shouldVerify (RemoteVerify r) =
(shouldVerify DefaultVerify
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)))
-- Export remotes are not key/value stores, so always verify
-- content from them even when verification is disabled.
<||> Types.Remote.isExportSupported r

25
Annex/Verify.hs Normal file
View file

@ -0,0 +1,25 @@
{- verification
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Verify where
import Annex.Common
import qualified Annex
import qualified Types.Remote
data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify
shouldVerify :: VerifyConfig -> Annex Bool
shouldVerify AlwaysVerify = return True
shouldVerify NoVerify = return False
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
shouldVerify (RemoteVerify r) =
(shouldVerify DefaultVerify
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)))
-- Export remotes are not key/value stores, so always verify
-- content from them even when verification is disabled.
<||> Types.Remote.isExportSupported r

View file

@ -1,6 +1,6 @@
{- git-annex key/value backends
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -17,11 +17,13 @@ module Backend (
isStableKey,
isCryptographicallySecure,
isVerifiable,
startVerifyKeyContentIncrementally,
) where
import Annex.Common
import qualified Annex
import Annex.CheckAttr
import Annex.Verify
import Types.Key
import Types.KeySource
import qualified Types.Backend as B
@ -127,3 +129,14 @@ isCryptographicallySecure k = maybe False (`B.isCryptographicallySecure` k)
isVerifiable :: Key -> Annex Bool
isVerifiable k = maybe False (isJust . B.verifyKeyContent)
<$> maybeLookupBackendVariety (fromKey keyVariety k)
startVerifyKeyContentIncrementally :: VerifyConfig -> Key -> Annex (Maybe B.IncrementalVerifier)
startVerifyKeyContentIncrementally verifyconfig k =
ifM (shouldVerify verifyconfig)
( maybeLookupBackendVariety (fromKey keyVariety k) >>= \case
Just b -> case B.verifyKeyContentIncrementally b of
Just v -> Just <$> v k
Nothing -> return Nothing
Nothing -> return Nothing
, return Nothing
)

View file

@ -145,8 +145,7 @@ sameCheckSum key s
expected = decodeBS (keyHash key)
checkKeyChecksumIncremental :: Hash -> Key -> Annex IncrementalVerifier
checkKeyChecksumIncremental hash key = liftIO $
(\h -> snd h key) (hasher hash)
checkKeyChecksumIncremental hash key = liftIO $ (snd $ hasher hash) key
keyHash :: Key -> S.ByteString
keyHash = fst . splitKeyNameExtension

View file

@ -24,6 +24,8 @@ git-annex (8.20210128) UNRELEASED; urgency=medium
* Include libkqueue.h file needed to build the assistant on BSDs.
* Tahoe: Avoid verifying hash after download, since tahoe does sufficient
verification itself.
* Checksum as content is received from a remote git-annex repository,
rather than doing it in a second pass.
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2021 12:34:32 -0400

View file

@ -1,6 +1,6 @@
{- P2P protocol, Annex implementation
-
- Copyright 2016-2018 Joey Hess <id@joeyh.name>
- Copyright 2016-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -23,9 +23,11 @@ import P2P.IO
import Logs.Location
import Types.NumCopies
import Utility.Metered
import Types.Backend (IncrementalVerifier(..))
import Control.Monad.Free
import Control.Concurrent.STM
import qualified Data.ByteString as S
-- Full interpreter for Proto, that can receive and send objects.
runFullProto :: RunState -> P2PConnection -> Proto a -> Annex (Either ProtoFailure a)
@ -77,7 +79,7 @@ runLocal runst runner a = case a of
let runtransfer ti =
Right <$> transfer download' k af Nothing (\p ->
logStatusAfter k $ getViaTmp rsp DefaultVerify k af $ \tmp ->
storefile (fromRawFilePath tmp) o l getb validitycheck p ti)
storefile (fromRawFilePath tmp) o l getb Nothing validitycheck p ti)
let fallback = return $ Left $
ProtoFailureMessage "transfer already in progress, or unable to take transfer lock"
checktransfer runtransfer fallback
@ -85,10 +87,10 @@ runLocal runst runner a = case a of
Left e -> return $ Left $ ProtoFailureException e
Right (Left e) -> return $ Left e
Right (Right ok) -> runner (next ok)
StoreContentTo dest o l getb validitycheck next -> do
StoreContentTo dest incrementalverifier o l getb validitycheck next -> do
v <- tryNonAsync $ do
let runtransfer ti = Right
<$> storefile dest o l getb validitycheck nullMeterUpdate ti
<$> storefile dest o l getb incrementalverifier validitycheck nullMeterUpdate ti
let fallback = return $ Left $
ProtoFailureMessage "transfer failed"
checktransfer runtransfer fallback
@ -153,16 +155,41 @@ runLocal runst runner a = case a of
-- Transfer logs are updated higher in the stack when
-- a client.
Client _ -> ta nullMeterUpdate
resumefromoffset o incrementalverifier p h
| o /= 0 = do
p' <- case incrementalverifier of
Just iv -> do
go iv o
return p
_ -> return $ offsetMeterUpdate p (toBytesProcessed o)
-- Make sure the handle is seeked to the offset.
-- (Reading the file probably left it there
-- when that was done, but let's be sure.)
hSeek h AbsoluteSeek o
return p'
| otherwise = return p
where
go iv n
| n == 0 = return ()
| n > fromIntegral defaultChunkSize = do
updateIncremental iv =<< S.hGet h defaultChunkSize
go iv (n - fromIntegral defaultChunkSize)
| otherwise =
updateIncremental iv =<< S.hGet h (fromIntegral n)
storefile dest (Offset o) (Len l) getb validitycheck p ti = do
let p' = offsetMeterUpdate p (toBytesProcessed o)
storefile dest (Offset o) (Len l) getb incrementalverifier validitycheck p ti = do
v <- runner getb
case v of
Right b -> do
liftIO $ withBinaryFile dest ReadWriteMode $ \h -> do
when (o /= 0) $
hSeek h AbsoluteSeek o
meteredWrite p' h b
p' <- resumefromoffset o incrementalverifier p h
let writechunk = case incrementalverifier of
Nothing -> \c -> S.hPut h c
Just iv -> \c -> do
S.hPut h c
updateIncremental iv c
meteredWrite p' writechunk b
indicatetransferred ti
rightsize <- do
@ -170,8 +197,12 @@ runLocal runst runner a = case a of
return (toInteger sz == l + o)
runner validitycheck >>= \case
Right (Just Valid) ->
return (rightsize, UnVerified)
Right (Just Valid) -> case incrementalverifier of
Just iv -> ifM (liftIO (finalizeIncremental iv) <&&> pure rightsize)
( return (True, Verified)
, return (False, UnVerified)
)
Nothing -> return (rightsize, UnVerified)
Right (Just Invalid) | l == 0 ->
-- Special case, for when
-- content was not

View file

@ -259,7 +259,7 @@ debugMessage conn prefix m = do
-- connection. False is returned to indicate this problem.
sendExactly :: Len -> L.ByteString -> Handle -> MeterUpdate -> IO Bool
sendExactly (Len n) b h p = do
sent <- meteredWrite' p h (L.take (fromIntegral n) b)
sent <- meteredWrite' p (B.hPut h) (L.take (fromIntegral n) b)
return (fromBytesProcessed sent == n)
receiveExactly :: Len -> Handle -> MeterUpdate -> IO L.ByteString

View file

@ -2,7 +2,7 @@
-
- See doc/design/p2p_protocol.mdwn
-
- Copyright 2016-2020 Joey Hess <id@joeyh.name>
- Copyright 2016-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -18,6 +18,7 @@ import Types (Annex)
import Types.Key
import Types.UUID
import Types.Remote (Verification(..), unVerified)
import Types.Backend (IncrementalVerifier(..))
import Utility.AuthToken
import Utility.Applicative
import Utility.PartialPrelude
@ -266,7 +267,7 @@ data LocalF c
-- Note: The ByteString may not contain the entire remaining content
-- of the key. Only once the temp file size == Len has the whole
-- content been transferred.
| StoreContentTo FilePath Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c)
| StoreContentTo FilePath (Maybe IncrementalVerifier) Offset Len (Proto L.ByteString) (Proto (Maybe Validity)) ((Bool, Verification) -> c)
-- ^ Like StoreContent, but stores the content to a temp file.
| SetPresent Key UUID c
| CheckContentPresent Key (Bool -> c)
@ -351,13 +352,13 @@ remove key = do
net $ sendMessage (REMOVE key)
checkSuccess
get :: FilePath -> Key -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification)
get dest key af m p =
get :: FilePath -> Key -> Maybe IncrementalVerifier -> AssociatedFile -> Meter -> MeterUpdate -> Proto (Bool, Verification)
get dest key iv af m p =
receiveContent (Just m) p sizer storer $ \offset ->
GET offset (ProtoAssociatedFile af) key
where
sizer = fileSize dest
storer = storeContentTo dest
storer = storeContentTo dest iv
put :: Key -> AssociatedFile -> MeterUpdate -> Proto Bool
put key af p = do

View file

@ -167,7 +167,7 @@ store r buprepo = byteStorer $ \k b p -> do
}
else cmd
feeder = \h -> do
meteredWrite p h b
meteredWrite p (S.hPut h) b
hClose h
in withCreateProcess cmd' (go feeder cmd')
where

View file

@ -557,6 +557,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met
then return v
else giveup "failed to retrieve content from remote"
else P2PHelper.retrieve
(Annex.Content.RemoteVerify r)
(\p -> Ssh.runProto r connpool (return (False, UnVerified)) (fallback p))
key file dest meterupdate
| otherwise = giveup "copying from non-ssh, non-http remote not supported"

View file

@ -9,6 +9,7 @@ module Remote.Glacier (remote, jobList, checkSaneGlacierCommand) where
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Annex.Common
@ -168,7 +169,7 @@ store' r k b p = go =<< glacierEnv c gc u
{ std_in = CreatePipe }
in liftIO $ withCreateProcess cmd (go' cmd)
go' cmd (Just hin) _ _ pid = do
meteredWrite p hin b
meteredWrite p (S.hPut hin) b
hClose hin
forceSuccessProcess cmd pid
go' _ _ _ _ _ = error "internal"

View file

@ -11,6 +11,7 @@ import Annex.Common
import Remote.Helper.Chunked
import Utility.Metered
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
{- This is an extension that's added to the usual file (or whatever)
@ -117,4 +118,4 @@ meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteStrin
meteredWriteFileChunks meterupdate dest chunks feeder =
withBinaryFile dest WriteMode $ \h ->
forM_ chunks $
meteredWrite meterupdate h <=< feeder
meteredWrite meterupdate (S.hPut h) <=< feeder

View file

@ -1,6 +1,6 @@
{- Helpers for remotes using the git-annex P2P protocol.
-
- Copyright 2016-2020 Joey Hess <id@joeyh.name>
- Copyright 2016-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -17,6 +17,7 @@ import Annex.Content
import Messages.Progress
import Utility.Metered
import Types.NumCopies
import Backend
import Control.Concurrent
@ -39,10 +40,11 @@ store runner k af p = do
Just False -> giveup "transfer failed"
Nothing -> remoteUnavail
retrieve :: (MeterUpdate -> ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Verification
retrieve runner k af dest p =
retrieve :: VerifyConfig -> (MeterUpdate -> ProtoRunner (Bool, Verification)) -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Verification
retrieve verifyconfig runner k af dest p = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
metered (Just p) k $ \m p' ->
runner p' (P2P.get dest k af m p') >>= \case
runner p' (P2P.get dest k iv af m p') >>= \case
Just (True, v) -> return v
Just (False, _) -> giveup "transfer failed"
Nothing -> remoteUnavail

View file

@ -295,7 +295,7 @@ sink dest enc c mh mp content = case (enc, mh, content) of
Just h -> liftIO $ b `streamto` h
Nothing -> liftIO $ bracket opendest hClose (b `streamto`)
streamto b h = case mp of
Just p -> meteredWrite p h b
Just p -> meteredWrite p (S.hPut h) b
Nothing -> L.hPut h b
opendest = openBinaryFile dest WriteMode

View file

@ -13,6 +13,7 @@ module Remote.P2P (
import Annex.Common
import qualified Annex
import qualified P2P.Protocol as P2P
import qualified Annex.Content
import P2P.Address
import P2P.Annex
import P2P.IO
@ -56,7 +57,7 @@ chainGen addr r u rc gc rs = do
, cost = cst
, name = Git.repoDescribe r
, storeKey = store (const protorunner)
, retrieveKeyFile = retrieve (const protorunner)
, retrieveKeyFile = retrieve (Annex.Content.RemoteVerify this) (const protorunner)
, retrieveKeyFileCheap = Nothing
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = remove protorunner

View file

@ -1,6 +1,6 @@
{- Metered IO and actions
-
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
- Copyright 2012-2021 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
@ -118,23 +118,24 @@ withMeteredFile :: FilePath -> MeterUpdate -> (L.ByteString -> IO a) -> IO a
withMeteredFile f meterupdate a = withBinaryFile f ReadMode $ \h ->
hGetContentsMetered h meterupdate >>= a
{- Writes a ByteString to a Handle, updating a meter as it's written. -}
meteredWrite :: MeterUpdate -> Handle -> L.ByteString -> IO ()
meteredWrite meterupdate h = void . meteredWrite' meterupdate h
{- Calls the action repeatedly with chunks from the lazy ByteString.
- Updates the meter after each chunk is processed. -}
meteredWrite :: MeterUpdate -> (S.ByteString -> IO ()) -> L.ByteString -> IO ()
meteredWrite meterupdate a = void . meteredWrite' meterupdate a
meteredWrite' :: MeterUpdate -> Handle -> L.ByteString -> IO BytesProcessed
meteredWrite' meterupdate h = go zeroBytesProcessed . L.toChunks
meteredWrite' :: MeterUpdate -> (S.ByteString -> IO ()) -> L.ByteString -> IO BytesProcessed
meteredWrite' meterupdate a = go zeroBytesProcessed . L.toChunks
where
go sofar [] = return sofar
go sofar (c:cs) = do
S.hPut h c
a c
let !sofar' = addBytesProcessed sofar $ S.length c
meterupdate sofar'
go sofar' cs
meteredWriteFile :: MeterUpdate -> FilePath -> L.ByteString -> IO ()
meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h ->
meteredWrite meterupdate h b
meteredWrite meterupdate (S.hPut h) b
{- Applies an offset to a MeterUpdate. This can be useful when
- performing a sequence of actions, such as multiple meteredWriteFiles,

View file

@ -671,9 +671,10 @@ Executable git-annex
Annex.UpdateInstead
Annex.UUID
Annex.Url
Annex.VariantFile
Annex.VectorClock
Annex.VectorClock.Utility
Annex.VariantFile
Annex.Verify
Annex.Version
Annex.View
Annex.View.ViewedFile