git-annex/Key.hs
Joey Hess 067aabdd48
wip RawFilePath 2x git-annex find speedup
Finally builds (oh the agoncy of making it build), but still very
unmergable, only Command.Find is included and lots of stuff is badly
hacked to make it compile.

Benchmarking vs master, this git-annex find is significantly faster!
Specifically:

	num files	old	new	speedup
	48500		4.77	3.73	28%
	12500		1.36	1.02	66%
	20		0.075	0.074	0% (so startup time is unchanged)

That's without really finishing the optimization. Things still to do:

* Eliminate all the fromRawFilePath, toRawFilePath, encodeBS,
  decodeBS conversions.
* Use versions of IO actions like getFileStatus that take a RawFilePath.
* Eliminate some Data.ByteString.Lazy.toStrict, which is a slow copy.
* Use ByteString for parsing git config to speed up startup.

It's likely several of those will speed up git-annex find further.
And other commands will certianly benefit even more.
2019-11-26 16:01:58 -04:00

106 lines
2.8 KiB
Haskell

{- git-annex Keys
-
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Key (
Key,
KeyData(..),
AssociatedFile(..),
fromKey,
mkKey,
alterKey,
keyParser,
serializeKey,
serializeKey',
deserializeKey,
deserializeKey',
nonChunkKey,
chunkKeyOffset,
isChunkKey,
isKeyPrefix,
splitKeyNameExtension,
prop_isomorphic_key_encode
) where
import qualified Data.Text as T
import qualified Data.ByteString as S
import qualified Data.Attoparsec.ByteString as A
import Common
import Types.Key
import Utility.QuickCheck
import Utility.Bloom
import Utility.Aeson
import qualified Utility.SimpleProtocol as Proto
-- Gets the parent of a chunk key.
nonChunkKey :: Key -> Key
nonChunkKey k
| fromKey keyChunkSize k == Nothing && fromKey keyChunkNum k == Nothing = k
| otherwise = alterKey k $ \d -> d
{ keyChunkSize = Nothing
, keyChunkNum = Nothing
}
-- Where a chunk key is offset within its parent.
chunkKeyOffset :: Key -> Maybe Integer
chunkKeyOffset k = (*)
<$> fromKey keyChunkSize k
<*> (pred <$> fromKey keyChunkNum k)
isChunkKey :: Key -> Bool
isChunkKey k = isJust (fromKey keyChunkSize k) && isJust (fromKey keyChunkNum k)
serializeKey :: Key -> String
serializeKey = decodeBS' . serializeKey'
serializeKey' :: Key -> S.ByteString
serializeKey' = keySerialization
deserializeKey :: String -> Maybe Key
deserializeKey = deserializeKey' . encodeBS'
deserializeKey' :: S.ByteString -> Maybe Key
deserializeKey' = either (const Nothing) Just . A.parseOnly keyParser
instance Arbitrary KeyData where
arbitrary = Key
<$> (encodeBS <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t"))
<*> (parseKeyVariety . encodeBS <$> (listOf1 $ elements ['A'..'Z'])) -- BACKEND
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
-- AssociatedFile cannot be empty (but can be Nothing)
instance Arbitrary AssociatedFile where
arbitrary = AssociatedFile . fmap toRawFilePath
<$> arbitrary `suchThat` (/= Just "")
instance Arbitrary Key where
arbitrary = mkKey . const <$> arbitrary
instance Hashable Key where
hashIO32 = hashIO32 . serializeKey'
hashIO64 = hashIO64 . serializeKey'
instance ToJSON' Key where
toJSON' = toJSON' . serializeKey
instance FromJSON Key where
parseJSON (String t) = maybe mempty pure $ deserializeKey $ T.unpack t
parseJSON _ = mempty
instance Proto.Serializable Key where
serialize = serializeKey
deserialize = deserializeKey
prop_isomorphic_key_encode :: Key -> Bool
prop_isomorphic_key_encode k = Just k == (deserializeKey . serializeKey) k