git-annex/Key.hs
Joey Hess 81d402216d cache the serialization of a Key
This will speed up the common case where a Key is deserialized from
disk, but is then serialized to build eg, the path to the annex object.

Previously attempted in 4536c93bb2
and reverted in 96aba8eff7.
The problems mentioned in the latter commit are addressed now:

Read/Show of KeyData is backwards-compatible with Read/Show of Key from before
this change, so Types.Distribution will keep working.

The Eq instance is fixed.

Also, Key has smart constructors, avoiding needing to remember to update
the cached serialization.

Used git-annex benchmark:
  find is 7% faster
  whereis is 3% faster
  get when all files are already present is 5% faster
Generally, the benchmarks are running 0.1 seconds faster per 2000 files,
on a ram disk in my laptop.
2019-11-22 17:49:16 -04:00

101 lines
2.6 KiB
Haskell

{- git-annex Keys
-
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Key (
Key,
KeyData(..),
AssociatedFile(..),
fromKey,
mkKey,
alterKey,
keyParser,
serializeKey,
serializeKey',
deserializeKey,
deserializeKey',
nonChunkKey,
chunkKeyOffset,
isChunkKey,
isKeyPrefix,
splitKeyNameExtension,
prop_isomorphic_key_encode
) where
import qualified Data.Text as T
import qualified Data.ByteString as S
import qualified Data.Attoparsec.ByteString as A
import Common
import Types.Key
import Utility.QuickCheck
import Utility.Bloom
import Utility.Aeson
import qualified Utility.SimpleProtocol as Proto
-- Gets the parent of a chunk key.
nonChunkKey :: Key -> Key
nonChunkKey k
| fromKey keyChunkSize k == Nothing && fromKey keyChunkNum k == Nothing = k
| otherwise = alterKey k $ \d -> d
{ keyChunkSize = Nothing
, keyChunkNum = Nothing
}
-- Where a chunk key is offset within its parent.
chunkKeyOffset :: Key -> Maybe Integer
chunkKeyOffset k = (*)
<$> fromKey keyChunkSize k
<*> (pred <$> fromKey keyChunkNum k)
isChunkKey :: Key -> Bool
isChunkKey k = isJust (fromKey keyChunkSize k) && isJust (fromKey keyChunkNum k)
serializeKey :: Key -> String
serializeKey = decodeBS' . serializeKey'
serializeKey' :: Key -> S.ByteString
serializeKey' = keySerialization
deserializeKey :: String -> Maybe Key
deserializeKey = deserializeKey' . encodeBS'
deserializeKey' :: S.ByteString -> Maybe Key
deserializeKey' = either (const Nothing) Just . A.parseOnly keyParser
instance Arbitrary KeyData where
arbitrary = Key
<$> (encodeBS <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t"))
<*> (parseKeyVariety . encodeBS <$> (listOf1 $ elements ['A'..'Z'])) -- BACKEND
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
instance Arbitrary Key where
arbitrary = mkKey . const <$> arbitrary
instance Hashable Key where
hashIO32 = hashIO32 . serializeKey'
hashIO64 = hashIO64 . serializeKey'
instance ToJSON' Key where
toJSON' = toJSON' . serializeKey
instance FromJSON Key where
parseJSON (String t) = maybe mempty pure $ deserializeKey $ T.unpack t
parseJSON _ = mempty
instance Proto.Serializable Key where
serialize = serializeKey
deserialize = deserializeKey
prop_isomorphic_key_encode :: Key -> Bool
prop_isomorphic_key_encode k = Just k == (deserializeKey . serializeKey) k