cache the serialization of a Key
This will speed up the common case where a Key is deserialized from disk, but is then serialized to build eg, the path to the annex object. It means that every place a Key has any of its fields changed, the cache has to be dropped. I've grepped and found them all. But, it would be better to avoid that gotcha somehow..
This commit is contained in:
parent
918868915c
commit
4536c93bb2
9 changed files with 34 additions and 7 deletions
|
@ -42,6 +42,7 @@ exportKey sha = mk <$> catKey sha
|
|||
, keyMtime = Nothing
|
||||
, keyChunkSize = Nothing
|
||||
, keyChunkNum = Nothing
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
|
||||
exportTree :: Remote.RemoteConfig -> Bool
|
||||
|
|
|
@ -58,7 +58,10 @@ genKey source preferredbackend = do
|
|||
Just k -> Just (makesane k, b)
|
||||
where
|
||||
-- keyNames should not contain newline characters.
|
||||
makesane k = k { keyName = S8.map fixbadchar (keyName k) }
|
||||
makesane k = k
|
||||
{ keyName = S8.map fixbadchar (keyName k)
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
fixbadchar c
|
||||
| c == '\n' = '_'
|
||||
| otherwise = c
|
||||
|
|
|
@ -181,6 +181,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
|
|||
| migratable && hasExt oldvariety = Just $ oldkey
|
||||
{ keyName = keyHash oldkey
|
||||
, keyVariety = newvariety
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
{- Fast migration from hash to hashE backend. -}
|
||||
| migratable && hasExt newvariety = case afile of
|
||||
|
@ -189,6 +190,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
|
|||
{ keyName = keyHash oldkey
|
||||
<> encodeBS (selectExtension maxextlen file)
|
||||
, keyVariety = newvariety
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
{- Upgrade to fix bad previous migration that created a
|
||||
- non-extension preserving key, with an extension
|
||||
|
@ -196,6 +198,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
|
|||
| newvariety == oldvariety && not (hasExt oldvariety) &&
|
||||
keyHash oldkey /= keyName oldkey = Just $ oldkey
|
||||
{ keyName = keyHash oldkey
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
| otherwise = Nothing
|
||||
where
|
||||
|
@ -288,5 +291,8 @@ testKeyBackend =
|
|||
let b = genBackendE (SHA2Hash (HashSize 256))
|
||||
in b { getKey = (fmap addE) <$$> getKey b }
|
||||
where
|
||||
addE k = k { keyName = keyName k <> longext }
|
||||
addE k = k
|
||||
{ keyName = keyName k <> longext
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
longext = ".this-is-a-test-key"
|
||||
|
|
|
@ -14,7 +14,7 @@ git-annex (7.20181212) UNRELEASED; urgency=medium
|
|||
* importfeed: Better error message when downloading the feed fails.
|
||||
* Some optimisations, including a 10x faster timestamp parser,
|
||||
a 7x faster key parser, and improved parsing and serialization of
|
||||
git-annex branch data.
|
||||
git-annex branch data. Some commands will run up to 15% faster.
|
||||
* Stricter parser for keys doesn't allow doubled fields or out of order fields.
|
||||
* The benchmark command, which only had some old benchmarking of the sqlite
|
||||
databases before, now allows benchmarking any other git-annex commands.
|
||||
|
|
|
@ -377,7 +377,10 @@ finishDownloadWith tmp u url file = do
|
|||
|
||||
{- Adds the url size to the Key. -}
|
||||
addSizeUrlKey :: Url.UrlInfo -> Key -> Key
|
||||
addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo }
|
||||
addSizeUrlKey urlinfo key = key
|
||||
{ keySize = Url.urlSize urlinfo
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
|
||||
{- Adds worktree file to the repository. -}
|
||||
addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
|
||||
|
|
13
Key.hs
13
Key.hs
|
@ -51,6 +51,7 @@ stubKey = Key
|
|||
, keyMtime = Nothing
|
||||
, keyChunkSize = Nothing
|
||||
, keyChunkNum = Nothing
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
|
||||
-- Gets the parent of a chunk key.
|
||||
|
@ -58,6 +59,7 @@ nonChunkKey :: Key -> Key
|
|||
nonChunkKey k = k
|
||||
{ keyChunkSize = Nothing
|
||||
, keyChunkNum = Nothing
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
|
||||
-- Where a chunk key is offset within its parent.
|
||||
|
@ -97,7 +99,9 @@ serializeKey :: Key -> String
|
|||
serializeKey = decodeBL' . serializeKey'
|
||||
|
||||
serializeKey' :: Key -> L.ByteString
|
||||
serializeKey' = toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty . buildKey
|
||||
serializeKey' k = case keySerialization k of
|
||||
Nothing -> toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty (buildKey k)
|
||||
Just b -> L.fromStrict b
|
||||
|
||||
{- This is a strict parser for security reasons; a key
|
||||
- can contain only 4 fields, which all consist only of numbers.
|
||||
|
@ -127,6 +131,7 @@ keyParser = do
|
|||
, keyMtime = m
|
||||
, keyChunkSize = cs
|
||||
, keyChunkNum = cn
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
else fail "invalid keyName"
|
||||
where
|
||||
|
@ -140,7 +145,10 @@ deserializeKey :: String -> Maybe Key
|
|||
deserializeKey = deserializeKey' . encodeBS'
|
||||
|
||||
deserializeKey' :: S.ByteString -> Maybe Key
|
||||
deserializeKey' b = eitherToMaybe $ A.parseOnly keyParser b
|
||||
deserializeKey' b = either
|
||||
(const Nothing)
|
||||
(\k -> Just $ k { keySerialization = Just b })
|
||||
(A.parseOnly keyParser b)
|
||||
|
||||
{- This splits any extension out of the keyName, returning the
|
||||
- keyName minus extension, and the extension (including leading dot).
|
||||
|
@ -177,6 +185,7 @@ instance Arbitrary Key where
|
|||
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative
|
||||
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
|
||||
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
|
||||
<*> pure Nothing
|
||||
|
||||
instance Hashable Key where
|
||||
hashIO32 = hashIO32 . serializeKey'
|
||||
|
|
|
@ -68,7 +68,10 @@ chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
|
|||
chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
|
||||
where
|
||||
mk chunknum = sizedk { keyChunkNum = Just chunknum }
|
||||
sizedk = basek { keyChunkSize = Just (toInteger chunksize) }
|
||||
sizedk = basek
|
||||
{ keyChunkSize = Just (toInteger chunksize)
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
|
||||
nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
|
||||
nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)
|
||||
|
|
|
@ -23,6 +23,7 @@ data Key = Key
|
|||
, keyMtime :: Maybe EpochTime
|
||||
, keyChunkSize :: Maybe Integer
|
||||
, keyChunkNum :: Maybe Integer
|
||||
, keySerialization :: Maybe S.ByteString -- ^ cached serialization
|
||||
} deriving (Eq, Ord, Read, Show)
|
||||
|
||||
{- A filename may be associated with a Key. -}
|
||||
|
|
|
@ -150,6 +150,7 @@ readKey1 v
|
|||
, keyVariety = parseKeyVariety (encodeBS b)
|
||||
, keySize = s
|
||||
, keyMtime = t
|
||||
, keySerialization = Nothing
|
||||
}
|
||||
where
|
||||
bits = splitc ':' v
|
||||
|
|
Loading…
Add table
Reference in a new issue