From 4536c93bb2ecf114ab711beac33fa358facd6985 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 14 Jan 2019 16:33:20 -0400 Subject: [PATCH] cache the serialization of a Key This will speed up the common case where a Key is deserialized from disk, but is then serialized to build eg, the path to the annex object. It means that every place a Key has any of its fields changed, the cache has to be dropped. I've grepped and found them all. But, it would be better to avoid that gotcha somehow.. --- Annex/Export.hs | 1 + Backend.hs | 5 ++++- Backend/Hash.hs | 8 +++++++- CHANGELOG | 2 +- Command/AddUrl.hs | 5 ++++- Key.hs | 13 +++++++++++-- Remote/Helper/Chunked.hs | 5 ++++- Types/Key.hs | 1 + Upgrade/V1.hs | 1 + 9 files changed, 34 insertions(+), 7 deletions(-) diff --git a/Annex/Export.hs b/Annex/Export.hs index 47a6a75249..2cc110cb83 100644 --- a/Annex/Export.hs +++ b/Annex/Export.hs @@ -42,6 +42,7 @@ exportKey sha = mk <$> catKey sha , keyMtime = Nothing , keyChunkSize = Nothing , keyChunkNum = Nothing + , keySerialization = Nothing } exportTree :: Remote.RemoteConfig -> Bool diff --git a/Backend.hs b/Backend.hs index 2932253aec..5b7b82e40c 100644 --- a/Backend.hs +++ b/Backend.hs @@ -58,7 +58,10 @@ genKey source preferredbackend = do Just k -> Just (makesane k, b) where -- keyNames should not contain newline characters. - makesane k = k { keyName = S8.map fixbadchar (keyName k) } + makesane k = k + { keyName = S8.map fixbadchar (keyName k) + , keySerialization = Nothing + } fixbadchar c | c == '\n' = '_' | otherwise = c diff --git a/Backend/Hash.hs b/Backend/Hash.hs index b8977301b3..6c42af19c3 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -181,6 +181,7 @@ trivialMigrate' oldkey newbackend afile maxextlen | migratable && hasExt oldvariety = Just $ oldkey { keyName = keyHash oldkey , keyVariety = newvariety + , keySerialization = Nothing } {- Fast migration from hash to hashE backend. -} | migratable && hasExt newvariety = case afile of @@ -189,6 +190,7 @@ trivialMigrate' oldkey newbackend afile maxextlen { keyName = keyHash oldkey <> encodeBS (selectExtension maxextlen file) , keyVariety = newvariety + , keySerialization = Nothing } {- Upgrade to fix bad previous migration that created a - non-extension preserving key, with an extension @@ -196,6 +198,7 @@ trivialMigrate' oldkey newbackend afile maxextlen | newvariety == oldvariety && not (hasExt oldvariety) && keyHash oldkey /= keyName oldkey = Just $ oldkey { keyName = keyHash oldkey + , keySerialization = Nothing } | otherwise = Nothing where @@ -288,5 +291,8 @@ testKeyBackend = let b = genBackendE (SHA2Hash (HashSize 256)) in b { getKey = (fmap addE) <$$> getKey b } where - addE k = k { keyName = keyName k <> longext } + addE k = k + { keyName = keyName k <> longext + , keySerialization = Nothing + } longext = ".this-is-a-test-key" diff --git a/CHANGELOG b/CHANGELOG index e44ee49d0d..b185bd513f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -14,7 +14,7 @@ git-annex (7.20181212) UNRELEASED; urgency=medium * importfeed: Better error message when downloading the feed fails. * Some optimisations, including a 10x faster timestamp parser, a 7x faster key parser, and improved parsing and serialization of - git-annex branch data. + git-annex branch data. Some commands will run up to 15% faster. * Stricter parser for keys doesn't allow doubled fields or out of order fields. * The benchmark command, which only had some old benchmarking of the sqlite databases before, now allows benchmarking any other git-annex commands. diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index be008f63aa..cff1166fdb 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -377,7 +377,10 @@ finishDownloadWith tmp u url file = do {- Adds the url size to the Key. -} addSizeUrlKey :: Url.UrlInfo -> Key -> Key -addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo } +addSizeUrlKey urlinfo key = key + { keySize = Url.urlSize urlinfo + , keySerialization = Nothing + } {- Adds worktree file to the repository. -} addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () diff --git a/Key.hs b/Key.hs index 16956ee5ea..eaa179d9f4 100644 --- a/Key.hs +++ b/Key.hs @@ -51,6 +51,7 @@ stubKey = Key , keyMtime = Nothing , keyChunkSize = Nothing , keyChunkNum = Nothing + , keySerialization = Nothing } -- Gets the parent of a chunk key. @@ -58,6 +59,7 @@ nonChunkKey :: Key -> Key nonChunkKey k = k { keyChunkSize = Nothing , keyChunkNum = Nothing + , keySerialization = Nothing } -- Where a chunk key is offset within its parent. @@ -97,7 +99,9 @@ serializeKey :: Key -> String serializeKey = decodeBL' . serializeKey' serializeKey' :: Key -> L.ByteString -serializeKey' = toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty . buildKey +serializeKey' k = case keySerialization k of + Nothing -> toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty (buildKey k) + Just b -> L.fromStrict b {- This is a strict parser for security reasons; a key - can contain only 4 fields, which all consist only of numbers. @@ -127,6 +131,7 @@ keyParser = do , keyMtime = m , keyChunkSize = cs , keyChunkNum = cn + , keySerialization = Nothing } else fail "invalid keyName" where @@ -140,7 +145,10 @@ deserializeKey :: String -> Maybe Key deserializeKey = deserializeKey' . encodeBS' deserializeKey' :: S.ByteString -> Maybe Key -deserializeKey' b = eitherToMaybe $ A.parseOnly keyParser b +deserializeKey' b = either + (const Nothing) + (\k -> Just $ k { keySerialization = Just b }) + (A.parseOnly keyParser b) {- This splits any extension out of the keyName, returning the - keyName minus extension, and the extension (including leading dot). @@ -177,6 +185,7 @@ instance Arbitrary Key where <*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative <*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative + <*> pure Nothing instance Hashable Key where hashIO32 = hashIO32 . serializeKey' diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index f3c69c38dd..595a4c4d63 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -68,7 +68,10 @@ chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..] where mk chunknum = sizedk { keyChunkNum = Just chunknum } - sizedk = basek { keyChunkSize = Just (toInteger chunksize) } + sizedk = basek + { keyChunkSize = Just (toInteger chunksize) + , keySerialization = Nothing + } nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream) nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l) diff --git a/Types/Key.hs b/Types/Key.hs index df0e042606..97d548ff78 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -23,6 +23,7 @@ data Key = Key , keyMtime :: Maybe EpochTime , keyChunkSize :: Maybe Integer , keyChunkNum :: Maybe Integer + , keySerialization :: Maybe S.ByteString -- ^ cached serialization } deriving (Eq, Ord, Read, Show) {- A filename may be associated with a Key. -} diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index c0dafbb842..e58e5818f7 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -150,6 +150,7 @@ readKey1 v , keyVariety = parseKeyVariety (encodeBS b) , keySize = s , keyMtime = t + , keySerialization = Nothing } where bits = splitc ':' v