cache the serialization of a Key

This will speed up the common case where a Key is deserialized from
disk, but is then serialized to build eg, the path to the annex object.

It means that every place a Key has any of its fields changed, the cache
has to be dropped. I've grepped and found them all. But, it would be
better to avoid that gotcha somehow..
This commit is contained in:
Joey Hess 2019-01-14 16:33:20 -04:00
parent 918868915c
commit 4536c93bb2
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 34 additions and 7 deletions

View file

@ -42,6 +42,7 @@ exportKey sha = mk <$> catKey sha
, keyMtime = Nothing , keyMtime = Nothing
, keyChunkSize = Nothing , keyChunkSize = Nothing
, keyChunkNum = Nothing , keyChunkNum = Nothing
, keySerialization = Nothing
} }
exportTree :: Remote.RemoteConfig -> Bool exportTree :: Remote.RemoteConfig -> Bool

View file

@ -58,7 +58,10 @@ genKey source preferredbackend = do
Just k -> Just (makesane k, b) Just k -> Just (makesane k, b)
where where
-- keyNames should not contain newline characters. -- keyNames should not contain newline characters.
makesane k = k { keyName = S8.map fixbadchar (keyName k) } makesane k = k
{ keyName = S8.map fixbadchar (keyName k)
, keySerialization = Nothing
}
fixbadchar c fixbadchar c
| c == '\n' = '_' | c == '\n' = '_'
| otherwise = c | otherwise = c

View file

@ -181,6 +181,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
| migratable && hasExt oldvariety = Just $ oldkey | migratable && hasExt oldvariety = Just $ oldkey
{ keyName = keyHash oldkey { keyName = keyHash oldkey
, keyVariety = newvariety , keyVariety = newvariety
, keySerialization = Nothing
} }
{- Fast migration from hash to hashE backend. -} {- Fast migration from hash to hashE backend. -}
| migratable && hasExt newvariety = case afile of | migratable && hasExt newvariety = case afile of
@ -189,6 +190,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
{ keyName = keyHash oldkey { keyName = keyHash oldkey
<> encodeBS (selectExtension maxextlen file) <> encodeBS (selectExtension maxextlen file)
, keyVariety = newvariety , keyVariety = newvariety
, keySerialization = Nothing
} }
{- Upgrade to fix bad previous migration that created a {- Upgrade to fix bad previous migration that created a
- non-extension preserving key, with an extension - non-extension preserving key, with an extension
@ -196,6 +198,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
| newvariety == oldvariety && not (hasExt oldvariety) && | newvariety == oldvariety && not (hasExt oldvariety) &&
keyHash oldkey /= keyName oldkey = Just $ oldkey keyHash oldkey /= keyName oldkey = Just $ oldkey
{ keyName = keyHash oldkey { keyName = keyHash oldkey
, keySerialization = Nothing
} }
| otherwise = Nothing | otherwise = Nothing
where where
@ -288,5 +291,8 @@ testKeyBackend =
let b = genBackendE (SHA2Hash (HashSize 256)) let b = genBackendE (SHA2Hash (HashSize 256))
in b { getKey = (fmap addE) <$$> getKey b } in b { getKey = (fmap addE) <$$> getKey b }
where where
addE k = k { keyName = keyName k <> longext } addE k = k
{ keyName = keyName k <> longext
, keySerialization = Nothing
}
longext = ".this-is-a-test-key" longext = ".this-is-a-test-key"

View file

@ -14,7 +14,7 @@ git-annex (7.20181212) UNRELEASED; urgency=medium
* importfeed: Better error message when downloading the feed fails. * importfeed: Better error message when downloading the feed fails.
* Some optimisations, including a 10x faster timestamp parser, * Some optimisations, including a 10x faster timestamp parser,
a 7x faster key parser, and improved parsing and serialization of a 7x faster key parser, and improved parsing and serialization of
git-annex branch data. git-annex branch data. Some commands will run up to 15% faster.
* Stricter parser for keys doesn't allow doubled fields or out of order fields. * Stricter parser for keys doesn't allow doubled fields or out of order fields.
* The benchmark command, which only had some old benchmarking of the sqlite * The benchmark command, which only had some old benchmarking of the sqlite
databases before, now allows benchmarking any other git-annex commands. databases before, now allows benchmarking any other git-annex commands.

View file

@ -377,7 +377,10 @@ finishDownloadWith tmp u url file = do
{- Adds the url size to the Key. -} {- Adds the url size to the Key. -}
addSizeUrlKey :: Url.UrlInfo -> Key -> Key addSizeUrlKey :: Url.UrlInfo -> Key -> Key
addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo } addSizeUrlKey urlinfo key = key
{ keySize = Url.urlSize urlinfo
, keySerialization = Nothing
}
{- Adds worktree file to the repository. -} {- Adds worktree file to the repository. -}
addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()

13
Key.hs
View file

@ -51,6 +51,7 @@ stubKey = Key
, keyMtime = Nothing , keyMtime = Nothing
, keyChunkSize = Nothing , keyChunkSize = Nothing
, keyChunkNum = Nothing , keyChunkNum = Nothing
, keySerialization = Nothing
} }
-- Gets the parent of a chunk key. -- Gets the parent of a chunk key.
@ -58,6 +59,7 @@ nonChunkKey :: Key -> Key
nonChunkKey k = k nonChunkKey k = k
{ keyChunkSize = Nothing { keyChunkSize = Nothing
, keyChunkNum = Nothing , keyChunkNum = Nothing
, keySerialization = Nothing
} }
-- Where a chunk key is offset within its parent. -- Where a chunk key is offset within its parent.
@ -97,7 +99,9 @@ serializeKey :: Key -> String
serializeKey = decodeBL' . serializeKey' serializeKey = decodeBL' . serializeKey'
serializeKey' :: Key -> L.ByteString serializeKey' :: Key -> L.ByteString
serializeKey' = toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty . buildKey serializeKey' k = case keySerialization k of
Nothing -> toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty (buildKey k)
Just b -> L.fromStrict b
{- This is a strict parser for security reasons; a key {- This is a strict parser for security reasons; a key
- can contain only 4 fields, which all consist only of numbers. - can contain only 4 fields, which all consist only of numbers.
@ -127,6 +131,7 @@ keyParser = do
, keyMtime = m , keyMtime = m
, keyChunkSize = cs , keyChunkSize = cs
, keyChunkNum = cn , keyChunkNum = cn
, keySerialization = Nothing
} }
else fail "invalid keyName" else fail "invalid keyName"
where where
@ -140,7 +145,10 @@ deserializeKey :: String -> Maybe Key
deserializeKey = deserializeKey' . encodeBS' deserializeKey = deserializeKey' . encodeBS'
deserializeKey' :: S.ByteString -> Maybe Key deserializeKey' :: S.ByteString -> Maybe Key
deserializeKey' b = eitherToMaybe $ A.parseOnly keyParser b deserializeKey' b = either
(const Nothing)
(\k -> Just $ k { keySerialization = Just b })
(A.parseOnly keyParser b)
{- This splits any extension out of the keyName, returning the {- This splits any extension out of the keyName, returning the
- keyName minus extension, and the extension (including leading dot). - keyName minus extension, and the extension (including leading dot).
@ -177,6 +185,7 @@ instance Arbitrary Key where
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative <*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative <*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
<*> pure Nothing
instance Hashable Key where instance Hashable Key where
hashIO32 = hashIO32 . serializeKey' hashIO32 = hashIO32 . serializeKey'

View file

@ -68,7 +68,10 @@ chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..] chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
where where
mk chunknum = sizedk { keyChunkNum = Just chunknum } mk chunknum = sizedk { keyChunkNum = Just chunknum }
sizedk = basek { keyChunkSize = Just (toInteger chunksize) } sizedk = basek
{ keyChunkSize = Just (toInteger chunksize)
, keySerialization = Nothing
}
nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream) nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l) nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)

View file

@ -23,6 +23,7 @@ data Key = Key
, keyMtime :: Maybe EpochTime , keyMtime :: Maybe EpochTime
, keyChunkSize :: Maybe Integer , keyChunkSize :: Maybe Integer
, keyChunkNum :: Maybe Integer , keyChunkNum :: Maybe Integer
, keySerialization :: Maybe S.ByteString -- ^ cached serialization
} deriving (Eq, Ord, Read, Show) } deriving (Eq, Ord, Read, Show)
{- A filename may be associated with a Key. -} {- A filename may be associated with a Key. -}

View file

@ -150,6 +150,7 @@ readKey1 v
, keyVariety = parseKeyVariety (encodeBS b) , keyVariety = parseKeyVariety (encodeBS b)
, keySize = s , keySize = s
, keyMtime = t , keyMtime = t
, keySerialization = Nothing
} }
where where
bits = splitc ':' v bits = splitc ':' v