convert Key to ShortByteString

This adds the overhead of a copy when serializing and deserializing keys.
I have not benchmarked much, but runtimes seem barely changed at all by that.

When a lot of keys are in memory, it improves memory use.

And, it prevents keys sometimes getting PINNED in memory and failing to GC,
which is a problem ByteString has sometimes. In particular, git-annex sync
from a borg special remote had that problem and this improved its memory
use by a large amount.

Sponsored-by: Shae Erisson on Patreon
This commit is contained in:
Joey Hess 2021-10-05 20:20:08 -04:00
parent 012b71e471
commit 19e78816f0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
15 changed files with 65 additions and 36 deletions

5
Key.hs
View file

@ -31,6 +31,7 @@ module Key (
import qualified Data.Text as T
import qualified Data.ByteString as S
import qualified Data.ByteString.Short as S (toShort, fromShort)
import qualified Data.Attoparsec.ByteString as A
import Common
@ -62,7 +63,7 @@ serializeKey :: Key -> String
serializeKey = decodeBS . serializeKey'
serializeKey' :: Key -> S.ByteString
serializeKey' = keySerialization
serializeKey' = S.fromShort . keySerialization
deserializeKey :: String -> Maybe Key
deserializeKey = deserializeKey' . encodeBS
@ -72,7 +73,7 @@ deserializeKey' = eitherToMaybe . A.parseOnly keyParser
instance Arbitrary KeyData where
arbitrary = Key
<$> (encodeBS <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t"))
<$> (S.toShort . encodeBS <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t"))
<*> (parseKeyVariety . encodeBS <$> (listOf1 $ elements ['A'..'Z'])) -- BACKEND
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative