cache the serialization of a Key

This will speed up the common case where a Key is deserialized from
disk, but is then serialized to build eg, the path to the annex object.

Previously attempted in 4536c93bb2
and reverted in 96aba8eff7.
The problems mentioned in the latter commit are addressed now:

Read/Show of KeyData is backwards-compatible with Read/Show of Key from before
this change, so Types.Distribution will keep working.

The Eq instance is fixed.

Also, Key has smart constructors, avoiding needing to remember to update
the cached serialization.

Used git-annex benchmark:
  find is 7% faster
  whereis is 3% faster
  get when all files are already present is 5% faster
Generally, the benchmarks are running 0.1 seconds faster per 2000 files,
on a ram disk in my laptop.
This commit is contained in:
Joey Hess 2019-11-22 16:24:04 -04:00
parent e296637737
commit 81d402216d
53 changed files with 388 additions and 289 deletions

View file

@ -68,8 +68,10 @@ newtype ChunkKeyStream = ChunkKeyStream [Key]
chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
where
mk chunknum = sizedk { keyChunkNum = Just chunknum }
sizedk = basek { keyChunkSize = Just (toInteger chunksize) }
mk chunknum = alterKey sizedk $ \d -> d
{ keyChunkNum = Just chunknum }
sizedk = alterKey basek $ \d -> d
{ keyChunkSize = Just (toInteger chunksize) }
nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)
@ -80,7 +82,7 @@ takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l
-- Number of chunks already consumed from the stream.
numChunks :: ChunkKeyStream -> Integer
numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream
numChunks = pred . fromJust . fromKey keyChunkNum . fst . nextChunkKeyStream
{- Splits up the key's content into chunks, passing each chunk to
- the storer action, along with a corresponding chunk key and a
@ -173,7 +175,7 @@ seekResume
-> Annex (ChunkKeyStream, BytesProcessed)
seekResume h encryptor chunkkeys checker = do
sz <- liftIO (hFileSize h)
if sz <= fromMaybe 0 (keyChunkSize $ fst $ nextChunkKeyStream chunkkeys)
if sz <= fromMaybe 0 (fromKey keyChunkSize $ fst $ nextChunkKeyStream chunkkeys)
then return (chunkkeys, zeroBytesProcessed)
else check 0 chunkkeys sz
where
@ -193,7 +195,7 @@ seekResume h encryptor chunkkeys checker = do
return (cks, toBytesProcessed pos)
where
(k, cks') = nextChunkKeyStream cks
pos' = pos + fromMaybe 0 (keyChunkSize k)
pos' = pos + fromMaybe 0 (fromKey keyChunkSize k)
{- Removes all chunks of a key from a remote, by calling a remover
- action on each.
@ -208,7 +210,7 @@ removeChunks remover u chunkconfig encryptor k = do
ls <- chunkKeys u chunkconfig k
ok <- allM (remover . encryptor) (concat ls)
when ok $ do
let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls
let chunksizes = catMaybes $ map (fromKey keyChunkSize <=< headMaybe) ls
forM_ chunksizes $ chunksRemoved u k . FixedSizeChunks . fromIntegral
return ok
@ -272,7 +274,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink
bracketIO (maybe opennew openresume offset) hClose $ \h -> do
void $ tosink (Just h) p content
let sz = toBytesProcessed $
fromMaybe 0 $ keyChunkSize k
fromMaybe 0 $ fromKey keyChunkSize k
getrest p h sz sz ks
`catchNonAsync` unable
case v of
@ -333,7 +335,7 @@ setupResume :: [[Key]] -> Integer -> [[Key]]
setupResume ls currsize = map dropunneeded ls
where
dropunneeded [] = []
dropunneeded l@(k:_) = case keyChunkSize k of
dropunneeded l@(k:_) = case fromKey keyChunkSize k of
Just chunksize | chunksize > 0 ->
genericDrop (currsize `div` chunksize) l
_ -> l

View file

@ -324,7 +324,7 @@ adjustExportImport r rs = case M.lookup "exporttree" (config r) of
liftIO $ Export.getExportTree db k
retrieveKeyFileFromExport dbv k _af dest p = unVerified $
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (fromKey keyVariety k))
then do
locs <- getexportlocs dbv k
case locs of
@ -336,5 +336,5 @@ adjustExportImport r rs = case M.lookup "exporttree" (config r) of
return False
(l:_) -> retrieveExport (exportActions r) k l dest p
else do
warning $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (keyVariety k)) ++ " backend"
warning $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend"
return False