faster storeChunks

No need to process each L.ByteString chunk, instead ask it to split.

Doesn't seem to have really sped things up much, but it also made the code
simpler.

Note that this does (and already did) buffer in memory. It seems that only
the directory special remote could take advantage of streaming chunks to
files w/o buffering, so probably won't add an interface to allow for that.
This commit is contained in:
Joey Hess 2014-07-27 01:18:38 -04:00
parent f3e47b16a5
commit c3af4897c0

View file

@ -26,7 +26,6 @@ import Utility.Metered
import Crypto (EncKey) import Crypto (EncKey)
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import qualified Data.Map as M import qualified Data.Map as M
data ChunkConfig data ChunkConfig
@ -70,6 +69,14 @@ numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream
- the storer action, along with a corresponding chunk key and a - the storer action, along with a corresponding chunk key and a
- progress meter update callback. - progress meter update callback.
- -
- This buffers each chunk in memory, so can use a lot of memory
- with a large ChunkSize.
- More optimal versions of this can be written, that rely
- on L.toChunks to split the lazy bytestring into chunks (typically
- smaller than the ChunkSize), and eg, write those chunks to a Handle.
- But this is the best that can be done with the storer interface that
- writes a whole L.ByteString at a time.
-
- This action may be called on a chunked key. It will simply store it. - This action may be called on a chunked key. It will simply store it.
-} -}
storeChunks storeChunks
@ -90,39 +97,26 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate ->
_ -> liftIO $ storer k b meterupdate _ -> liftIO $ storer k b meterupdate
gochunks :: MeterUpdate -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool gochunks :: MeterUpdate -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool
gochunks meterupdate chunksize lb = gochunks meterupdate chunksize = loop zeroBytesProcessed . splitchunk
loop zeroBytesProcessed chunksize (L.toChunks lb) []
where where
loop bytesprocessed sz [] c chunkkeys splitchunk = L.splitAt chunksize
-- Always store at least one chunk,
-- even for empty content. loop bytesprocessed (chunk, bs) chunkkeys
| not (null c) || numchunks == 0 = | L.null chunk && numchunks > 0 = do
storechunk bytesprocessed sz [] c chunkkeys -- Once all chunks are successfully
-- Once all chunks are successfully stored, -- stored, update the chunk log.
-- update the chunk log.
| otherwise = do
chunksStored u k chunksize numchunks chunksStored u k chunksize numchunks
return True return True
where | otherwise = do
numchunks = numChunks chunkkeys
loop bytesprocessed sz (b:bs) c chunkkeys
| s <= sz || sz == chunksize =
loop bytesprocessed sz' bs (b:c) chunkkeys
| otherwise =
storechunk bytesprocessed sz' bs (b:c) chunkkeys
where
s = fromIntegral (S.length b)
sz' = sz - s
storechunk bytesprocessed sz bs c chunkkeys = do
let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys
ifM (liftIO $ storer chunkkey (L.fromChunks $ reverse c) meterupdate') ifM (liftIO $ storer chunkkey chunk meterupdate')
( do ( do
let bytesprocessed' = addBytesProcessed bytesprocessed (chunksize - sz) let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk)
loop bytesprocessed' chunksize bs [] chunkkeys' loop bytesprocessed' (splitchunk bs) chunkkeys'
, return False , return False
) )
where where
numchunks = numChunks chunkkeys
{- The MeterUpdate that is passed to the action {- The MeterUpdate that is passed to the action
- storing a chunk is offset, so that it reflects - storing a chunk is offset, so that it reflects
- the total bytes that have already been stored - the total bytes that have already been stored