incremental verify for byteRetriever special remotes
Several special remotes verify content while it is being retrieved, avoiding a separate checksum pass. They are: S3, bup, ddar, and gcrypt (with a local repository). Not done when using chunking, yet. Complicated by Retriever needing to change to be polymorphic. Which in turn meant RankNTypes is needed, and also needed some code changes. The change in Remote.External does not change behavior at all but avoids the type checking failing because of a "rigid, skolem type" which "would escape its scope". So I refactored slightly to make the type checker's job easier there. Unfortunately, directory uses fileRetriever (except when chunked), so it is not amoung the improved ones. Fixing that would need a way for FileRetriever to return a Verification. But, since the file retrieved may be encrypted or chunked, it would be extra work to always incrementally checksum the file while retrieving it. Hm. Some other special remotes use fileRetriever, and so don't get incremental verification, but could be converted to byteRetriever later. One is GitLFS, which uses downloadConduit, which writes to the file, so could verify as it goes. Other special remotes like web could too, but don't use Remote.Helper.Special and so will need to be addressed separately. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
9518aca2f5
commit
c20358b671
16 changed files with 111 additions and 50 deletions
|
@ -8,6 +8,10 @@ git-annex (8.20210804) UNRELEASED; urgency=medium
|
|||
* add: When adding a dotfile, avoid treating its name as an extension.
|
||||
* rsync special remote: Stop displaying rsync progress, and use
|
||||
git-annex's own progress display.
|
||||
* Several special remotes verify content while it is being retrieved,
|
||||
avoiding a separate checksum pass. They are: S3, bup, ddar,
|
||||
and gcrypt (with a local repository). This optimisation is not yet
|
||||
available when chunks are used.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.Adb (remote) where
|
||||
|
||||
import Annex.Common
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.Bup (remote) where
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
@ -184,10 +186,11 @@ retrieve buprepo = byteRetriever $ \k sink -> do
|
|||
bracketIO (createProcess p) cleanupProcess (go sink p)
|
||||
where
|
||||
go sink p (_, Just h, _, pid) = do
|
||||
() <- sink =<< liftIO (L.hGetContents h)
|
||||
r <- sink =<< liftIO (L.hGetContents h)
|
||||
liftIO $ do
|
||||
hClose h
|
||||
forceSuccessProcess p pid
|
||||
return r
|
||||
go _ _ _ = error "internal"
|
||||
|
||||
{- Cannot revert having stored a key in bup, but at least the data for the
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
-}
|
||||
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.Ddar (remote) where
|
||||
|
||||
|
@ -166,10 +167,11 @@ retrieve ddarrepo = byteRetriever $ \k sink -> do
|
|||
bracketIO (createProcess p) cleanupProcess (go sink p)
|
||||
where
|
||||
go sink p (_, Just h, _, pid) = do
|
||||
() <- sink =<< liftIO (L.hGetContents h)
|
||||
r <- sink =<< liftIO (L.hGetContents h)
|
||||
liftIO $ do
|
||||
hClose h
|
||||
forceSuccessProcess p pid
|
||||
return r
|
||||
go _ _ _ = error "internal"
|
||||
|
||||
remove :: DdarRepo -> Remover
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Remote.Directory (
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.Directory.LegacyChunked where
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.External (remote) where
|
||||
|
||||
|
@ -66,17 +67,19 @@ gen r u rc gc rs
|
|||
| externaltype == "readonly" = do
|
||||
c <- parsedRemoteConfig remote rc
|
||||
cst <- remoteCost gc expensiveRemoteCost
|
||||
mk c cst GloballyAvailable
|
||||
readonlyStorer
|
||||
retrieveUrl
|
||||
readonlyRemoveKey
|
||||
checkKeyUrl
|
||||
let rmt = mk c cst GloballyAvailable
|
||||
Nothing
|
||||
(externalInfo externaltype)
|
||||
Nothing
|
||||
Nothing
|
||||
exportUnsupported
|
||||
exportUnsupported
|
||||
return $ Just $ specialRemote c
|
||||
readonlyStorer
|
||||
retrieveUrl
|
||||
readonlyRemoveKey
|
||||
checkKeyUrl
|
||||
rmt
|
||||
| otherwise = do
|
||||
c <- parsedRemoteConfig remote rc
|
||||
external <- newExternal externaltype (Just u) c (Just gc)
|
||||
|
@ -103,20 +106,22 @@ gen r u rc gc rs
|
|||
let cheapexportsupported = if exportsupported
|
||||
then exportIsSupported
|
||||
else exportUnsupported
|
||||
mk c cst avail
|
||||
(storeKeyM external)
|
||||
(retrieveKeyFileM external)
|
||||
(removeKeyM external)
|
||||
(checkPresentM external)
|
||||
let rmt = mk c cst avail
|
||||
(Just (whereisKeyM external))
|
||||
(getInfoM external)
|
||||
(Just (claimUrlM external))
|
||||
(Just (checkUrlM external))
|
||||
exportactions
|
||||
cheapexportsupported
|
||||
return $ Just $ specialRemote c
|
||||
(storeKeyM external)
|
||||
(retrieveKeyFileM external)
|
||||
(removeKeyM external)
|
||||
(checkPresentM external)
|
||||
rmt
|
||||
where
|
||||
mk c cst avail tostore toretrieve toremove tocheckkey towhereis togetinfo toclaimurl tocheckurl exportactions cheapexportsupported = do
|
||||
let rmt = Remote
|
||||
mk c cst avail towhereis togetinfo toclaimurl tocheckurl exportactions cheapexportsupported =
|
||||
Remote
|
||||
{ uuid = u
|
||||
, cost = cst
|
||||
, name = Git.repoDescribe r
|
||||
|
@ -154,12 +159,6 @@ gen r u rc gc rs
|
|||
, checkUrl = tocheckurl
|
||||
, remoteStateHandle = rs
|
||||
}
|
||||
return $ Just $ specialRemote c
|
||||
tostore
|
||||
toretrieve
|
||||
toremove
|
||||
tocheckkey
|
||||
rmt
|
||||
externaltype = fromMaybe (giveup "missing externaltype") (remoteAnnexExternalType gc)
|
||||
|
||||
externalSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.GCrypt (
|
||||
remote,
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Remote.GitLFS (remote, gen, configKnownUrl) where
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.Glacier (remote, jobList, checkSaneGlacierCommand) where
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
@ -176,7 +178,7 @@ store' r k b p = go =<< glacierEnv c gc u
|
|||
retrieve :: Remote -> Retriever
|
||||
retrieve = byteRetriever . retrieve'
|
||||
|
||||
retrieve' :: Remote -> Key -> (L.ByteString -> Annex ()) -> Annex ()
|
||||
retrieve' :: Remote -> Key -> (L.ByteString -> Annex a) -> Annex a
|
||||
retrieve' r k sink = go =<< glacierEnv c gc u
|
||||
where
|
||||
c = config r
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
{- git-annex chunked remotes
|
||||
-
|
||||
- Copyright 2014-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2014-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.Helper.Chunked (
|
||||
ChunkSize,
|
||||
ChunkConfig(..),
|
||||
|
@ -30,6 +32,7 @@ import Utility.Metered
|
|||
import Crypto
|
||||
import Backend (isStableKey)
|
||||
import Annex.SpecialRemote.Config
|
||||
import Annex.Verify
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
|
@ -250,6 +253,7 @@ retrieveChunks
|
|||
:: LensGpgEncParams encc
|
||||
=> Retriever
|
||||
-> UUID
|
||||
-> VerifyConfig
|
||||
-> ChunkConfig
|
||||
-> EncKey
|
||||
-> Key
|
||||
|
@ -257,15 +261,26 @@ retrieveChunks
|
|||
-> MeterUpdate
|
||||
-> Maybe (Cipher, EncKey)
|
||||
-> encc
|
||||
-> Annex ()
|
||||
retrieveChunks retriever u chunkconfig encryptor basek dest basep enc encc
|
||||
| noChunks chunkconfig =
|
||||
-> Annex Verification
|
||||
retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
|
||||
| noChunks chunkconfig = do
|
||||
-- Optimisation: Try the unchunked key first, to avoid
|
||||
-- looking in the git-annex branch for chunk counts
|
||||
-- that are likely not there.
|
||||
getunchunked `catchNonAsync`
|
||||
(\e -> go (Just e) =<< chunkKeysOnly u chunkconfig basek)
|
||||
| otherwise = go Nothing =<< chunkKeys u chunkconfig basek
|
||||
iv <- startVerifyKeyContentIncrementally vc basek
|
||||
tryNonAsync (getunchunked iv) >>= \case
|
||||
Right Nothing -> return UnVerified
|
||||
Right (Just iv') ->
|
||||
ifM (liftIO $ finalizeIncremental iv')
|
||||
( return Verified
|
||||
, return UnVerified
|
||||
)
|
||||
Left e -> do
|
||||
go (Just e) =<< chunkKeysOnly u chunkconfig basek
|
||||
return UnVerified
|
||||
| otherwise = do
|
||||
go Nothing =<< chunkKeys u chunkconfig basek
|
||||
return UnVerified
|
||||
where
|
||||
go pe cks = do
|
||||
let ls = map chunkKeyList cks
|
||||
|
@ -279,7 +294,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep enc encc
|
|||
firstavail (Just e) _ [] = throwM e
|
||||
firstavail pe currsize ([]:ls) = firstavail pe currsize ls
|
||||
firstavail _ currsize ((k:ks):ls)
|
||||
| k == basek = getunchunked
|
||||
| k == basek = void (getunchunked Nothing)
|
||||
`catchNonAsync` (\e -> firstavail (Just e) currsize ls)
|
||||
| otherwise = do
|
||||
let offset = resumeOffset currsize k
|
||||
|
@ -289,7 +304,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep enc encc
|
|||
v <- tryNonAsync $
|
||||
retriever (encryptor k) p $ \content ->
|
||||
bracketIO (maybe opennew openresume offset) hClose $ \h -> do
|
||||
retrieved (Just h) p content
|
||||
void $ retrieved Nothing (Just h) p content
|
||||
let sz = toBytesProcessed $
|
||||
fromMaybe 0 $ fromKey keyChunkSize k
|
||||
getrest p h sz sz ks
|
||||
|
@ -303,10 +318,12 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep enc encc
|
|||
getrest p h sz bytesprocessed (k:ks) = do
|
||||
let p' = offsetMeterUpdate p bytesprocessed
|
||||
liftIO $ p' zeroBytesProcessed
|
||||
retriever (encryptor k) p' $ retrieved (Just h) p'
|
||||
retriever (encryptor k) p' $
|
||||
void . retrieved Nothing (Just h) p'
|
||||
getrest p h sz (addBytesProcessed bytesprocessed sz) ks
|
||||
|
||||
getunchunked = retriever (encryptor basek) basep $ retrieved Nothing basep
|
||||
getunchunked iv = retriever (encryptor basek) basep $
|
||||
retrieved iv Nothing basep
|
||||
|
||||
opennew = openBinaryFile dest WriteMode
|
||||
|
||||
|
@ -326,21 +343,27 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep enc encc
|
|||
- Instead, writeRetrievedContent is passed a meter to update
|
||||
- as it consumes the ByteString.
|
||||
-}
|
||||
retrieved h p content = writeRetrievedContent dest enc encc h p' content
|
||||
retrieved iv h p content =
|
||||
writeRetrievedContent dest enc encc h p' content iv
|
||||
where
|
||||
p'
|
||||
| isByteContent content = Just p
|
||||
| otherwise = Nothing
|
||||
|
||||
{- Writes retrieved file content into the provided Handle, decrypting it
|
||||
{- Writes retrieved file content to the provided Handle, decrypting it
|
||||
- first if necessary.
|
||||
-
|
||||
- If the remote did not store the content using chunks, no Handle
|
||||
- will be provided, and it's up to us to open the destination file.
|
||||
- will be provided, and instead the content will be written to the
|
||||
- dest file.
|
||||
-
|
||||
- Note that when neither chunking nor encryption is used, and the remote
|
||||
- provides FileContent, that file only needs to be renamed
|
||||
- into place. (And it may even already be in the right place..)
|
||||
-
|
||||
- The IncrementalVerifier is updated as the file content is read.
|
||||
- If it was not able to be updated, due to the file not needing to be read,
|
||||
- Nothing will be returned.
|
||||
-}
|
||||
writeRetrievedContent
|
||||
:: LensGpgEncParams encc
|
||||
|
@ -350,31 +373,45 @@ writeRetrievedContent
|
|||
-> Maybe Handle
|
||||
-> Maybe MeterUpdate
|
||||
-> ContentSource
|
||||
-> Annex ()
|
||||
writeRetrievedContent dest enc encc mh mp content = case (enc, mh, content) of
|
||||
-> Maybe IncrementalVerifier
|
||||
-> Annex (Maybe IncrementalVerifier)
|
||||
writeRetrievedContent dest enc encc mh mp content miv = case (enc, mh, content) of
|
||||
(Nothing, Nothing, FileContent f)
|
||||
| f == dest -> noop
|
||||
| otherwise -> liftIO $ moveFile f dest
|
||||
| f == dest -> return Nothing
|
||||
| otherwise -> do
|
||||
liftIO $ moveFile f dest
|
||||
return Nothing
|
||||
(Just (cipher, _), _, ByteContent b) -> do
|
||||
cmd <- gpgCmd <$> Annex.getGitConfig
|
||||
decrypt cmd encc cipher (feedBytes b) $
|
||||
readBytes write
|
||||
return miv
|
||||
(Just (cipher, _), _, FileContent f) -> do
|
||||
cmd <- gpgCmd <$> Annex.getGitConfig
|
||||
withBytes content $ \b ->
|
||||
decrypt cmd encc cipher (feedBytes b) $
|
||||
readBytes write
|
||||
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f)
|
||||
return miv
|
||||
(Nothing, _, FileContent f) -> do
|
||||
withBytes content write
|
||||
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f)
|
||||
(Nothing, _, ByteContent b) -> write b
|
||||
return miv
|
||||
(Nothing, _, ByteContent b) -> do
|
||||
write b
|
||||
return miv
|
||||
where
|
||||
write b = case mh of
|
||||
Just h -> liftIO $ b `streamto` h
|
||||
Nothing -> liftIO $ bracket opendest hClose (b `streamto`)
|
||||
streamto b h = case mp of
|
||||
Just p -> meteredWrite p (S.hPut h) b
|
||||
Just h -> liftIO $ write' b h
|
||||
Nothing -> liftIO $ bracket opendest hClose (write' b)
|
||||
write' b h = case mp of
|
||||
Just p ->
|
||||
let writer = case miv of
|
||||
Just iv -> \s -> do
|
||||
updateIncremental iv s
|
||||
S.hPut h s
|
||||
Nothing -> S.hPut h
|
||||
in meteredWrite p writer b
|
||||
Nothing -> L.hPut h b
|
||||
opendest = openBinaryFile dest WriteMode
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Remote.Helper.Special (
|
||||
|
@ -111,7 +112,7 @@ fileRetriever a k m callback = do
|
|||
-- A Retriever that generates a lazy ByteString containing the Key's
|
||||
-- content, and passes it to a callback action which will fully consume it
|
||||
-- before returning.
|
||||
byteRetriever :: (Key -> (L.ByteString -> Annex ()) -> Annex ()) -> Retriever
|
||||
byteRetriever :: (Key -> (L.ByteString -> Annex a) -> Annex a) -> Key -> MeterUpdate -> (ContentSource -> Annex a) -> Annex a
|
||||
byteRetriever a k _m callback = a k (callback . ByteContent)
|
||||
|
||||
{- The base Remote that is provided to specialRemote needs to have
|
||||
|
@ -216,11 +217,12 @@ specialRemote' cfg c storer retriever remover checkpresent baser = encr
|
|||
enck = maybe id snd enc
|
||||
|
||||
-- call retriever to get chunks; decrypt them; stream to dest file
|
||||
retrieveKeyFileGen k dest p enc = do
|
||||
retrieveKeyFileGen k dest p enc =
|
||||
displayprogress p k Nothing $ \p' ->
|
||||
retrieveChunks retriever (uuid baser) chunkconfig
|
||||
enck k dest p' enc encr
|
||||
return UnVerified
|
||||
retrieveChunks retriever
|
||||
(uuid baser)
|
||||
(RemoteVerify baser)
|
||||
chunkconfig enck k dest p' enc encr
|
||||
where
|
||||
enck = maybe id snd enc
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.Hook (remote) where
|
||||
|
||||
import Annex.Common
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
{-# LANGUAGE TypeFamilies #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Remote.S3 (remote, iaHost, configIA, iaItemUrl) where
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Remote.WebDAV (remote, davCreds, configUrl) where
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
|
||||
module Types.StoreRetrieve where
|
||||
|
||||
import Annex.Common
|
||||
|
@ -28,7 +30,7 @@ type Storer = Key -> ContentSource -> MeterUpdate -> Annex ()
|
|||
-- Action that retrieves a Key's content from a remote, passing it to a
|
||||
-- callback, which will fully consume the content before returning.
|
||||
-- Throws exception if key is not present, or remote is not accessible.
|
||||
type Retriever = Key -> MeterUpdate -> (ContentSource -> Annex ()) -> Annex ()
|
||||
type Retriever = forall a. Key -> MeterUpdate -> (ContentSource -> Annex a) -> Annex a
|
||||
|
||||
-- Action that removes a Key's content from a remote.
|
||||
-- Succeeds if key is already not present.
|
||||
|
|
Loading…
Reference in a new issue