cache the serialization of a Key
This will speed up the common case where a Key is deserialized from disk, but is then serialized to build eg, the path to the annex object. Previously attempted in4536c93bb2
and reverted in96aba8eff7
. The problems mentioned in the latter commit are addressed now: Read/Show of KeyData is backwards-compatible with Read/Show of Key from before this change, so Types.Distribution will keep working. The Eq instance is fixed. Also, Key has smart constructors, avoiding needing to remember to update the cached serialization. Used git-annex benchmark: find is 7% faster whereis is 3% faster get when all files are already present is 5% faster Generally, the benchmarks are running 0.1 seconds faster per 2000 files, on a ram disk in my laptop.
This commit is contained in:
parent
e296637737
commit
81d402216d
53 changed files with 388 additions and 289 deletions
173
Types/Key.hs
173
Types/Key.hs
|
@ -7,19 +7,47 @@
|
|||
|
||||
{-# LANGUAGE OverloadedStrings, DeriveGeneric #-}
|
||||
|
||||
module Types.Key where
|
||||
module Types.Key (
|
||||
KeyData(..),
|
||||
Key,
|
||||
fromKey,
|
||||
mkKey,
|
||||
alterKey,
|
||||
isKeyPrefix,
|
||||
splitKeyNameExtension,
|
||||
keyParser,
|
||||
keySerialization,
|
||||
AssociatedFile(..),
|
||||
KeyVariety(..),
|
||||
HasExt(..),
|
||||
HashSize(..),
|
||||
hasExt,
|
||||
sameExceptExt,
|
||||
cryptographicallySecure,
|
||||
isVerifiable,
|
||||
formatKeyVariety,
|
||||
parseKeyVariety,
|
||||
) where
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Data.ByteString.Builder
|
||||
import Data.ByteString.Builder.Extra
|
||||
import qualified Data.Attoparsec.ByteString as A
|
||||
import qualified Data.Attoparsec.ByteString.Char8 as A8
|
||||
import Data.List
|
||||
import System.Posix.Types
|
||||
import Foreign.C.Types
|
||||
import Data.Monoid
|
||||
import Control.Applicative
|
||||
import GHC.Generics
|
||||
import Control.DeepSeq
|
||||
import Prelude
|
||||
|
||||
{- A Key has a unique name, which is derived from a particular backend,
|
||||
- and may contain other optional metadata. -}
|
||||
data Key = Key
|
||||
data KeyData = Key
|
||||
{ keyName :: S.ByteString
|
||||
, keyVariety :: KeyVariety
|
||||
, keySize :: Maybe Integer
|
||||
|
@ -28,8 +56,149 @@ data Key = Key
|
|||
, keyChunkNum :: Maybe Integer
|
||||
} deriving (Eq, Ord, Read, Show, Generic)
|
||||
|
||||
instance NFData KeyData
|
||||
|
||||
{- Caching the seralization of a key is an optimization.
|
||||
-
|
||||
- This constructor is not exported, and all smart constructors maintain
|
||||
- the serialization.
|
||||
-}
|
||||
data Key = MkKey
|
||||
{ keyData :: KeyData
|
||||
, keySerialization :: S.ByteString
|
||||
} deriving (Show, Generic)
|
||||
|
||||
instance Eq Key where
|
||||
-- comparing the serialization would be unncessary work
|
||||
a == b = keyData a == keyData b
|
||||
|
||||
instance Ord Key where
|
||||
compare a b = compare (keyData a) (keyData b)
|
||||
|
||||
instance NFData Key
|
||||
|
||||
{- Access a field of data from the KeyData. -}
|
||||
{-# INLINE fromKey #-}
|
||||
fromKey :: (KeyData -> a) -> Key -> a
|
||||
fromKey f = f . keyData
|
||||
|
||||
{- Smart constructor for a Key. The provided KeyData has all values empty. -}
|
||||
mkKey :: (KeyData -> KeyData) -> Key
|
||||
mkKey f =
|
||||
let d = f stub
|
||||
in MkKey d (mkKeySerialization d)
|
||||
where
|
||||
stub = Key
|
||||
{ keyName = mempty
|
||||
, keyVariety = OtherKey mempty
|
||||
, keySize = Nothing
|
||||
, keyMtime = Nothing
|
||||
, keyChunkSize = Nothing
|
||||
, keyChunkNum = Nothing
|
||||
}
|
||||
|
||||
{- Alter a Key's data. -}
|
||||
alterKey :: Key -> (KeyData -> KeyData) -> Key
|
||||
alterKey k f =
|
||||
let d = f (keyData k)
|
||||
in MkKey d (mkKeySerialization d)
|
||||
|
||||
-- Checks if a string looks like at least the start of a key.
|
||||
isKeyPrefix :: String -> Bool
|
||||
isKeyPrefix s = [fieldSep, fieldSep] `isInfixOf` s
|
||||
|
||||
fieldSep :: Char
|
||||
fieldSep = '-'
|
||||
|
||||
mkKeySerialization :: KeyData -> S.ByteString
|
||||
mkKeySerialization = L.toStrict
|
||||
. toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty
|
||||
. buildKeyData
|
||||
|
||||
{- Builds a ByteString from a KeyData.
|
||||
-
|
||||
- The name field is always shown last, separated by doubled fieldSeps,
|
||||
- and is the only field allowed to contain the fieldSep.
|
||||
-}
|
||||
buildKeyData :: KeyData -> Builder
|
||||
buildKeyData k = byteString (formatKeyVariety (keyVariety k))
|
||||
<> 's' ?: (integerDec <$> keySize k)
|
||||
<> 'm' ?: (integerDec . (\(CTime t) -> fromIntegral t) <$> keyMtime k)
|
||||
<> 'S' ?: (integerDec <$> keyChunkSize k)
|
||||
<> 'C' ?: (integerDec <$> keyChunkNum k)
|
||||
<> sepbefore (sepbefore (byteString (keyName k)))
|
||||
where
|
||||
sepbefore s = char7 fieldSep <> s
|
||||
c ?: (Just b) = sepbefore (char7 c <> b)
|
||||
_ ?: Nothing = mempty
|
||||
|
||||
{- This is a strict parser for security reasons; a key
|
||||
- can contain only 4 fields, which all consist only of numbers.
|
||||
- Any key containing other fields, or non-numeric data will fail
|
||||
- to parse.
|
||||
-
|
||||
- If a key contained non-numeric fields, they could be used to
|
||||
- embed data used in a SHA1 collision attack, which would be a
|
||||
- problem since the keys are committed to git.
|
||||
-}
|
||||
keyParser :: A.Parser Key
|
||||
keyParser = do
|
||||
-- key variety cannot be empty
|
||||
v <- (parseKeyVariety <$> A8.takeWhile1 (/= fieldSep))
|
||||
s <- parsesize
|
||||
m <- parsemtime
|
||||
cs <- parsechunksize
|
||||
cn <- parsechunknum
|
||||
_ <- A8.char fieldSep
|
||||
_ <- A8.char fieldSep
|
||||
n <- A.takeByteString
|
||||
if validKeyName v n
|
||||
then
|
||||
let d = Key
|
||||
{ keyName = n
|
||||
, keyVariety = v
|
||||
, keySize = s
|
||||
, keyMtime = m
|
||||
, keyChunkSize = cs
|
||||
, keyChunkNum = cn
|
||||
}
|
||||
in pure $ MkKey d (mkKeySerialization d)
|
||||
else fail "invalid keyName"
|
||||
where
|
||||
parseopt p = (Just <$> (A8.char fieldSep *> p)) <|> pure Nothing
|
||||
parsesize = parseopt $ A8.char 's' *> A8.decimal
|
||||
parsemtime = parseopt $ CTime <$> (A8.char 'm' *> A8.decimal)
|
||||
parsechunksize = parseopt $ A8.char 'S' *> A8.decimal
|
||||
parsechunknum = parseopt $ A8.char 'C' *> A8.decimal
|
||||
|
||||
{- Limits the length of the extension in the keyName to mitigate against
|
||||
- SHA1 collision attacks.
|
||||
-
|
||||
- In such an attack, the extension of the key could be made to contain
|
||||
- the collision generation data, with the result that a signed git commit
|
||||
- including such keys would not be secure.
|
||||
-
|
||||
- The maximum extension length ever generated for such a key was 8
|
||||
- characters, but they may be unicode which could use up to 4 bytes each,
|
||||
- so 32 bytes. 64 bytes is used here to give a little future wiggle-room.
|
||||
- The SHA1 common-prefix attack needs 128 bytes of data.
|
||||
-}
|
||||
validKeyName :: KeyVariety -> S.ByteString -> Bool
|
||||
validKeyName kv name
|
||||
| hasExt kv =
|
||||
let ext = snd $ splitKeyNameExtension' name
|
||||
in S.length ext <= 64
|
||||
| otherwise = True
|
||||
|
||||
{- This splits any extension out of the keyName, returning the
|
||||
- keyName minus extension, and the extension (including leading dot).
|
||||
-}
|
||||
splitKeyNameExtension :: Key -> (S.ByteString, S.ByteString)
|
||||
splitKeyNameExtension = splitKeyNameExtension' . keyName . keyData
|
||||
|
||||
splitKeyNameExtension' :: S.ByteString -> (S.ByteString, S.ByteString)
|
||||
splitKeyNameExtension' keyname = S8.span (/= '.') keyname
|
||||
|
||||
{- A filename may be associated with a Key. -}
|
||||
newtype AssociatedFile = AssociatedFile (Maybe FilePath)
|
||||
deriving (Show, Eq, Ord)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue