Avoiding using a callback simplifies this and should make it easier to
implement incremental checksumming, which will need to happen partly in
writeRetrievedContent and partly in retrieveChunks.
This commit is contained in:
Joey Hess 2021-02-16 15:46:14 -04:00
parent 48310f2d55
commit 381f203d1a
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 88 additions and 78 deletions

View file

@ -20,16 +20,19 @@ module Remote.Helper.Chunked (
) where
import Annex.Common
import qualified Annex
import Utility.DataUnits
import Types.StoreRetrieve
import Types.Remote
import Types.ProposedAccepted
import Logs.Chunk
import Utility.Metered
import Crypto (EncKey)
import Crypto
import Backend (isStableKey)
import Annex.SpecialRemote.Config
import qualified Utility.RawFilePath as R
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
data ChunkConfig
@ -109,16 +112,19 @@ numChunks = pred . fromJust . fromKey keyChunkNum . fst . nextChunkKeyStream
- writes a whole L.ByteString at a time.
-}
storeChunks
:: UUID
:: LensGpgEncParams encc
=> UUID
-> ChunkConfig
-> EncKey
-> Key
-> FilePath
-> MeterUpdate
-> Maybe (Cipher, EncKey)
-> encc
-> Storer
-> CheckPresent
-> Annex ()
storeChunks u chunkconfig encryptor k f p storer checker =
storeChunks u chunkconfig encryptor k f p enc encc storer checker =
case chunkconfig of
-- Only stable keys are safe to store chunked,
-- because an unstable key can have multiple different
@ -129,9 +135,9 @@ storeChunks u chunkconfig encryptor k f p storer checker =
h <- liftIO $ openBinaryFile f ReadMode
go chunksize h
liftIO $ hClose h
, storer k (FileContent f) p
, storechunk k (FileContent f) p
)
_ -> storer k (FileContent f) p
_ -> storechunk k (FileContent f) p
where
go chunksize h = do
let chunkkeys = chunkKeyStream k chunksize
@ -152,7 +158,7 @@ storeChunks u chunkconfig encryptor k f p storer checker =
| otherwise = do
liftIO $ meterupdate' zeroBytesProcessed
let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys
storer chunkkey (ByteContent chunk) meterupdate'
storechunk chunkkey (ByteContent chunk) meterupdate'
let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk)
loop bytesprocessed' (splitchunk bs) chunkkeys'
where
@ -163,6 +169,15 @@ storeChunks u chunkconfig encryptor k f p storer checker =
- in previous chunks. -}
meterupdate' = offsetMeterUpdate meterupdate bytesprocessed
storechunk ck content meterupdate = case enc of
Nothing -> storer ck content meterupdate
(Just (cipher, enck)) -> do
cmd <- gpgCmd <$> Annex.getGitConfig
withBytes content $ \b ->
encrypt cmd encc cipher (feedBytes b) $
readBytes $ \encb ->
storer (enck ck) (ByteContent encb) meterupdate
{- Check if any of the chunk keys are present. If found, seek forward
- in the Handle, so it will be read starting at the first missing chunk.
- Returns the ChunkKeyStream truncated to start at the first missing
@ -219,28 +234,31 @@ removeChunks remover u chunkconfig encryptor k = do
-
- When the remote is chunked, tries each of the options returned by
- chunkKeys until it finds one where the retriever successfully
- gets the first chunked key. The content of that key, and any
- other chunks in the list is fed to the sink.
- gets the first chunked key.
-
- If retrival of one of the subsequent chunks throws an exception,
- gives up. Note that partial data may have been written to the sink
- gives up. Note that partial data may have been written to the file
- in this case.
-
- Resuming is supported when using chunks. When the destination file
- already exists, it skips to the next chunked key that would be needed
- to resume.
-
- Handles decrypting the content when encryption is used.
-}
retrieveChunks
:: Retriever
:: LensGpgEncParams encc
=> Retriever
-> UUID
-> ChunkConfig
-> EncKey
-> Key
-> FilePath
-> MeterUpdate
-> (Maybe Handle -> Maybe MeterUpdate -> ContentSource -> Annex ())
-> Maybe (Cipher, EncKey)
-> encc
-> Annex ()
retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
retrieveChunks retriever u chunkconfig encryptor basek dest basep enc encc
| noChunks chunkconfig =
-- Optimisation: Try the unchunked key first, to avoid
-- looking in the git-annex branch for chunk counts
@ -271,7 +289,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
v <- tryNonAsync $
retriever (encryptor k) p $ \content ->
bracketIO (maybe opennew openresume offset) hClose $ \h -> do
tosink (Just h) p content
retrieved (Just h) p content
let sz = toBytesProcessed $
fromMaybe 0 $ fromKey keyChunkSize k
getrest p h sz sz ks
@ -285,10 +303,10 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
getrest p h sz bytesprocessed (k:ks) = do
let p' = offsetMeterUpdate p bytesprocessed
liftIO $ p' zeroBytesProcessed
retriever (encryptor k) p' $ tosink (Just h) p'
retriever (encryptor k) p' $ retrieved (Just h) p'
getrest p h sz (addBytesProcessed bytesprocessed sz) ks
getunchunked = retriever (encryptor basek) basep $ tosink Nothing basep
getunchunked = retriever (encryptor basek) basep $ retrieved Nothing basep
opennew = openBinaryFile dest WriteMode
@ -305,15 +323,61 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
-
- However, if the Retriever generates a lazy ByteString,
- it is not responsible for updating progress (often it cannot).
- Instead, the sink is passed a meter to update as it consumes
- the ByteString.
- Instead, writeRetrievedContent is passed a meter to update
- as it consumes the ByteString.
-}
tosink h p content = sink h p' content
retrieved h p content = writeRetrievedContent dest enc encc h p' content
where
p'
| isByteContent content = Just p
| otherwise = Nothing
{- Writes retrieved file content into the provided Handle, decrypting it
- first if necessary.
-
- If the remote did not store the content using chunks, no Handle
- will be provided, and it's up to us to open the destination file.
-
- Note that when neither chunking nor encryption is used, and the remote
- provides FileContent, that file only needs to be renamed
- into place. (And it may even already be in the right place..)
-}
writeRetrievedContent
:: LensGpgEncParams encc
=> FilePath
-> Maybe (Cipher, EncKey)
-> encc
-> Maybe Handle
-> Maybe MeterUpdate
-> ContentSource
-> Annex ()
writeRetrievedContent dest enc encc mh mp content = case (enc, mh, content) of
(Nothing, Nothing, FileContent f)
| f == dest -> noop
| otherwise -> liftIO $ moveFile f dest
(Just (cipher, _), _, ByteContent b) -> do
cmd <- gpgCmd <$> Annex.getGitConfig
decrypt cmd encc cipher (feedBytes b) $
readBytes write
(Just (cipher, _), _, FileContent f) -> do
cmd <- gpgCmd <$> Annex.getGitConfig
withBytes content $ \b ->
decrypt cmd encc cipher (feedBytes b) $
readBytes write
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f)
(Nothing, _, FileContent f) -> do
withBytes content write
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f)
(Nothing, _, ByteContent b) -> write b
where
write b = case mh of
Just h -> liftIO $ b `streamto` h
Nothing -> liftIO $ bracket opendest hClose (b `streamto`)
streamto b h = case mp of
Just p -> meteredWrite p (S.hPut h) b
Nothing -> L.hPut h b
opendest = openBinaryFile dest WriteMode
{- Can resume when the chunk's offset is at or before the end of
- the dest file. -}
resumeOffset :: Maybe Integer -> Key -> Maybe Integer
@ -455,3 +519,7 @@ ensureChunksAreLogged :: UUID -> Key -> ChunkKeys -> Annex ()
ensureChunksAreLogged u k (SpeculativeChunkKeys (chunkmethod, chunkcount) _) =
chunksStored u k chunkmethod chunkcount
ensureChunksAreLogged _ _ (ChunkKeys _) = return ()
withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a
withBytes (ByteContent b) a = a b
withBytes (FileContent f) a = a =<< liftIO (L.readFile f)

View file

@ -35,11 +35,9 @@ module Remote.Helper.Special (
) where
import Annex.Common
import qualified Annex
import Annex.SpecialRemote.Config
import Types.StoreRetrieve
import Types.Remote
import Crypto
import Annex.UUID
import Config
import Config.Cost
@ -51,7 +49,6 @@ import Messages.Progress
import qualified Git
import qualified Git.Construct
import Git.Types
import qualified Utility.RawFilePath as R
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
@ -213,25 +210,16 @@ specialRemote' cfg c storer retriever remover checkpresent baser = encr
storeKeyGen k p enc = sendAnnex k rollback $ \src ->
displayprogress p k (Just src) $ \p' ->
storeChunks (uuid baser) chunkconfig enck k src p'
(storechunk enc)
checkpresent
enc encr storer checkpresent
where
rollback = void $ removeKey encr k
enck = maybe id snd enc
storechunk Nothing k content p = storer k content p
storechunk (Just (cipher, enck)) k content p = do
cmd <- gpgCmd <$> Annex.getGitConfig
withBytes content $ \b ->
encrypt cmd encr cipher (feedBytes b) $
readBytes $ \encb ->
storer (enck k) (ByteContent encb) p
-- call retriever to get chunks; decrypt them; stream to dest file
retrieveKeyFileGen k dest p enc = do
displayprogress p k Nothing $ \p' ->
retrieveChunks retriever (uuid baser) chunkconfig
enck k dest p' (sink dest enc encr)
enck k dest p' enc encr
return UnVerified
where
enck = maybe id snd enc
@ -253,52 +241,6 @@ specialRemote' cfg c storer retriever remover checkpresent baser = encr
metered (Just p) (KeySizer k (pure (fmap toRawFilePath srcfile))) (const a)
| otherwise = a p
{- Sink callback for retrieveChunks. Stores the file content into the
- provided Handle, decrypting it first if necessary.
-
- If the remote did not store the content using chunks, no Handle
- will be provided, and it's up to us to open the destination file.
-
- Note that when neither chunking nor encryption is used, and the remote
- provides FileContent, that file only needs to be renamed
- into place. (And it may even already be in the right place..)
-}
sink
:: LensGpgEncParams c
=> FilePath
-> Maybe (Cipher, EncKey)
-> c
-> Maybe Handle
-> Maybe MeterUpdate
-> ContentSource
-> Annex ()
sink dest enc c mh mp content = case (enc, mh, content) of
(Nothing, Nothing, FileContent f)
| f == dest -> noop
| otherwise -> liftIO $ moveFile f dest
(Just (cipher, _), _, ByteContent b) -> do
cmd <- gpgCmd <$> Annex.getGitConfig
decrypt cmd c cipher (feedBytes b) $
readBytes write
(Just (cipher, _), _, FileContent f) -> do
cmd <- gpgCmd <$> Annex.getGitConfig
withBytes content $ \b ->
decrypt cmd c cipher (feedBytes b) $
readBytes write
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f)
(Nothing, _, FileContent f) -> do
withBytes content write
liftIO $ removeWhenExistsWith R.removeLink (toRawFilePath f)
(Nothing, _, ByteContent b) -> write b
where
write b = case mh of
Just h -> liftIO $ b `streamto` h
Nothing -> liftIO $ bracket opendest hClose (b `streamto`)
streamto b h = case mp of
Just p -> meteredWrite p (S.hPut h) b
Nothing -> L.hPut h b
opendest = openBinaryFile dest WriteMode
withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a
withBytes (ByteContent b) a = a b
withBytes (FileContent f) a = a =<< liftIO (L.readFile f)