cache the serialization of a Key

This will speed up the common case where a Key is deserialized from
disk, but is then serialized to build eg, the path to the annex object.

Previously attempted in 4536c93bb2
and reverted in 96aba8eff7.
The problems mentioned in the latter commit are addressed now:

Read/Show of KeyData is backwards-compatible with Read/Show of Key from before
this change, so Types.Distribution will keep working.

The Eq instance is fixed.

Also, Key has smart constructors, avoiding needing to remember to update
the cached serialization.

Used git-annex benchmark:
  find is 7% faster
  whereis is 3% faster
  get when all files are already present is 5% faster
Generally, the benchmarks are running 0.1 seconds faster per 2000 files,
on a ram disk in my laptop.
This commit is contained in:
Joey Hess 2019-11-22 16:24:04 -04:00
parent e296637737
commit 81d402216d
53 changed files with 388 additions and 289 deletions

View file

@ -50,23 +50,23 @@ import qualified Command.Unused
type Stat = StatState (Maybe (String, StatState String))
-- data about a set of keys
data KeyData = KeyData
data KeyInfo = KeyInfo
{ countKeys :: Integer
, sizeKeys :: Integer
, unknownSizeKeys :: Integer
, backendsKeys :: M.Map KeyVariety Integer
}
instance Sem.Semigroup KeyData where
a <> b = KeyData
instance Sem.Semigroup KeyInfo where
a <> b = KeyInfo
{ countKeys = countKeys a + countKeys b
, sizeKeys = sizeKeys a + sizeKeys b
, unknownSizeKeys = unknownSizeKeys a + unknownSizeKeys b
, backendsKeys = backendsKeys a <> backendsKeys b
}
instance Monoid KeyData where
mempty = KeyData 0 0 0 M.empty
instance Monoid KeyInfo where
mempty = KeyInfo 0 0 0 M.empty
data NumCopiesStats = NumCopiesStats
{ numCopiesVarianceMap :: M.Map Variance Integer
@ -82,9 +82,9 @@ instance Show Variance where
-- cached info that multiple Stats use
data StatInfo = StatInfo
{ presentData :: Maybe KeyData
, referencedData :: Maybe KeyData
, repoData :: M.Map UUID KeyData
{ presentData :: Maybe KeyInfo
, referencedData :: Maybe KeyInfo
, repoData :: M.Map UUID KeyInfo
, numCopiesStats :: Maybe NumCopiesStats
, infoOptions :: InfoOptions
}
@ -512,7 +512,7 @@ reposizes_total :: Stat
reposizes_total = simpleStat "combined size of repositories containing these files" $
showSizeKeys . mconcat . M.elems =<< cachedRepoData
cachedPresentData :: StatState KeyData
cachedPresentData :: StatState KeyInfo
cachedPresentData = do
s <- get
case presentData s of
@ -522,7 +522,7 @@ cachedPresentData = do
put s { presentData = Just v }
return v
cachedRemoteData :: UUID -> StatState KeyData
cachedRemoteData :: UUID -> StatState KeyInfo
cachedRemoteData u = do
s <- get
case M.lookup u (repoData s) of
@ -531,19 +531,19 @@ cachedRemoteData u = do
let combinedata d uk = finishCheck uk >>= \case
Nothing -> return d
Just k -> return $ addKey k d
v <- lift $ foldM combinedata emptyKeyData
v <- lift $ foldM combinedata emptyKeyInfo
=<< loggedKeysFor' u
put s { repoData = M.insert u v (repoData s) }
return v
cachedReferencedData :: StatState KeyData
cachedReferencedData :: StatState KeyInfo
cachedReferencedData = do
s <- get
case referencedData s of
Just v -> return v
Nothing -> do
!v <- lift $ Command.Unused.withKeysReferenced
emptyKeyData addKey
emptyKeyInfo addKey
put s { referencedData = Just v }
return v
@ -552,7 +552,7 @@ cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
cachedNumCopiesStats = numCopiesStats <$> get
-- currently only available for directory info
cachedRepoData :: StatState (M.Map UUID KeyData)
cachedRepoData :: StatState (M.Map UUID KeyInfo)
cachedRepoData = repoData <$> get
getDirStatInfo :: InfoOptions -> FilePath -> Annex StatInfo
@ -564,7 +564,7 @@ getDirStatInfo o dir = do
(update matcher fast)
return $ StatInfo (Just presentdata) (Just referenceddata) repodata (Just numcopiesstats) o
where
initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats, M.empty)
initial = (emptyKeyInfo, emptyKeyInfo, emptyNumCopiesStats, M.empty)
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) =
ifM (matcher $ MatchingFile $ FileInfo file file)
( do
@ -594,7 +594,7 @@ getTreeStatInfo o r = do
, return Nothing
)
where
initial = (emptyKeyData, emptyKeyData, M.empty)
initial = (emptyKeyInfo, emptyKeyInfo, M.empty)
go _ [] vs = return vs
go fast (l:ls) vs@(presentdata, referenceddata, repodata) = do
mk <- catKey (LsTree.sha l)
@ -613,33 +613,33 @@ getTreeStatInfo o r = do
return (updateRepoData key locs repodata)
go fast ls $! (presentdata', referenceddata', repodata')
emptyKeyData :: KeyData
emptyKeyData = KeyData 0 0 0 M.empty
emptyKeyInfo :: KeyInfo
emptyKeyInfo = KeyInfo 0 0 0 M.empty
emptyNumCopiesStats :: NumCopiesStats
emptyNumCopiesStats = NumCopiesStats M.empty
foldKeys :: [Key] -> KeyData
foldKeys = foldl' (flip addKey) emptyKeyData
foldKeys :: [Key] -> KeyInfo
foldKeys = foldl' (flip addKey) emptyKeyInfo
addKey :: Key -> KeyData -> KeyData
addKey key (KeyData count size unknownsize backends) =
KeyData count' size' unknownsize' backends'
addKey :: Key -> KeyInfo -> KeyInfo
addKey key (KeyInfo count size unknownsize backends) =
KeyInfo count' size' unknownsize' backends'
where
{- All calculations strict to avoid thunks when repeatedly
- applied to many keys. -}
!count' = count + 1
!backends' = M.insertWith (+) (keyVariety key) 1 backends
!backends' = M.insertWith (+) (fromKey keyVariety key) 1 backends
!size' = maybe size (+ size) ks
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
ks = keySize key
ks = fromKey keySize key
updateRepoData :: Key -> [UUID] -> M.Map UUID KeyData -> M.Map UUID KeyData
updateRepoData :: Key -> [UUID] -> M.Map UUID KeyInfo -> M.Map UUID KeyInfo
updateRepoData key locs m = m'
where
!m' = M.unionWith (\_old new -> new) m $
M.fromList $ zip locs (map update locs)
update loc = addKey key (fromMaybe emptyKeyData $ M.lookup loc m)
update loc = addKey key (fromMaybe emptyKeyInfo $ M.lookup loc m)
updateNumCopiesStats :: FilePath -> NumCopiesStats -> [UUID] -> Annex NumCopiesStats
updateNumCopiesStats file (NumCopiesStats m) locs = do
@ -649,7 +649,7 @@ updateNumCopiesStats file (NumCopiesStats m) locs = do
let !ret = NumCopiesStats m'
return ret
showSizeKeys :: KeyData -> StatState String
showSizeKeys :: KeyInfo -> StatState String
showSizeKeys d = do
sizer <- mkSizer
return $ total sizer ++ missingnote