separate handles for cat-file and cat-file --batch-check

This avoids starting one process when only the other one is needed.
Eg in git-annex smudge --clean, this reduces the total number of
cat-file processes that are started from 4 to 2.

The only performance penalty is that when both are needed, it has to do
twice as much work to maintain the two Maps. But both are very small,
consisting of 1 or 2 items, so that work is negligible.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2021-09-24 13:16:13 -04:00
parent e6c0bbd645
commit e47b4badb3
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 82 additions and 37 deletions

View file

@ -1,6 +1,6 @@
{- git-cat file handles pools
-
- Copyright 2020 Joey Hess <id@joeyh.name>
- Copyright 2020-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -9,22 +9,30 @@ module Types.CatFileHandles (
CatFileHandles(..),
catFileHandlesNonConcurrent,
catFileHandlesPool,
CatMap(..),
emptyCatMap,
) where
import Control.Concurrent.STM
import qualified Data.Map as M
import Utility.ResourcePool
import Git.CatFile (CatFileHandle)
import Git.CatFile (CatFileHandle, CatFileMetaDataHandle)
data CatFileHandles
= CatFileHandlesNonConcurrent CatMap
| CatFileHandlesPool (TMVar CatMap)
type CatMap = M.Map FilePath (ResourcePool CatFileHandle)
data CatMap = CatMap
{ catFileMap :: M.Map FilePath (ResourcePool CatFileHandle)
, catFileMetaDataMap :: M.Map FilePath (ResourcePool CatFileMetaDataHandle)
}
emptyCatMap :: CatMap
emptyCatMap = CatMap M.empty M.empty
catFileHandlesNonConcurrent :: CatFileHandles
catFileHandlesNonConcurrent = CatFileHandlesNonConcurrent M.empty
catFileHandlesNonConcurrent = CatFileHandlesNonConcurrent emptyCatMap
catFileHandlesPool :: IO CatFileHandles
catFileHandlesPool = CatFileHandlesPool <$> newTMVarIO M.empty
catFileHandlesPool = CatFileHandlesPool <$> newTMVarIO emptyCatMap