separate handles for cat-file and cat-file --batch-check
This avoids starting one process when only the other one is needed. Eg in git-annex smudge --clean, this reduces the total number of cat-file processes that are started from 4 to 2. The only performance penalty is that when both are needed, it has to do twice as much work to maintain the two Maps. But both are very small, consisting of 1 or 2 items, so that work is negligible. Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
e6c0bbd645
commit
e47b4badb3
3 changed files with 82 additions and 37 deletions
|
@ -1,6 +1,6 @@
|
|||
{- git cat-file interface, with handle automatically stored in the Annex monad
|
||||
-
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -58,7 +58,7 @@ catObject ref = withCatFileHandle $ \h ->
|
|||
liftIO $ Git.CatFile.catObject h ref
|
||||
|
||||
catObjectMetaData :: Git.Ref -> Annex (Maybe (Sha, Integer, ObjectType))
|
||||
catObjectMetaData ref = withCatFileHandle $ \h ->
|
||||
catObjectMetaData ref = withCatFileMetaDataHandle $ \h ->
|
||||
liftIO $ Git.CatFile.catObjectMetaData h ref
|
||||
|
||||
catTree :: Git.Ref -> Annex [(FilePath, FileMode)]
|
||||
|
@ -77,32 +77,50 @@ catObjectDetails ref = withCatFileHandle $ \h ->
|
|||
- for each. That is selected by setting GIT_INDEX_FILE in the gitEnv
|
||||
- before running this. -}
|
||||
withCatFileHandle :: (Git.CatFile.CatFileHandle -> Annex a) -> Annex a
|
||||
withCatFileHandle a = do
|
||||
withCatFileHandle = withCatFileHandle'
|
||||
Git.CatFile.catFileStart
|
||||
catFileMap
|
||||
(\v m -> v { catFileMap = m })
|
||||
|
||||
withCatFileMetaDataHandle :: (Git.CatFile.CatFileMetaDataHandle -> Annex a) -> Annex a
|
||||
withCatFileMetaDataHandle = withCatFileHandle'
|
||||
Git.CatFile.catFileMetaDataStart
|
||||
catFileMetaDataMap
|
||||
(\v m -> v { catFileMetaDataMap = m })
|
||||
|
||||
withCatFileHandle'
|
||||
:: (Repo -> IO hdl)
|
||||
-> (CatMap -> M.Map FilePath (ResourcePool hdl))
|
||||
-> (CatMap -> M.Map FilePath (ResourcePool hdl) -> CatMap)
|
||||
-> (hdl -> Annex a)
|
||||
-> Annex a
|
||||
withCatFileHandle' startcat get set a = do
|
||||
cfh <- Annex.getState Annex.catfilehandles
|
||||
indexfile <- fromMaybe "" . maybe Nothing (lookup indexEnv)
|
||||
<$> fromRepo gitEnv
|
||||
p <- case cfh of
|
||||
CatFileHandlesNonConcurrent m -> case M.lookup indexfile m of
|
||||
CatFileHandlesNonConcurrent m -> case M.lookup indexfile (get m) of
|
||||
Just p -> return p
|
||||
Nothing -> do
|
||||
p <- mkResourcePoolNonConcurrent startcatfile
|
||||
let !m' = M.insert indexfile p m
|
||||
Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent m' }
|
||||
let !m' = set m (M.insert indexfile p (get m))
|
||||
Annex.changeState $ \s -> s
|
||||
{ Annex.catfilehandles = CatFileHandlesNonConcurrent m' }
|
||||
return p
|
||||
CatFileHandlesPool tm -> do
|
||||
m <- liftIO $ atomically $ takeTMVar tm
|
||||
case M.lookup indexfile m of
|
||||
case M.lookup indexfile (get m) of
|
||||
Just p -> do
|
||||
liftIO $ atomically $ putTMVar tm m
|
||||
return p
|
||||
Nothing -> do
|
||||
p <- mkResourcePool maxCatFiles
|
||||
let !m' = M.insert indexfile p m
|
||||
let !m' = set m (M.insert indexfile p (get m))
|
||||
liftIO $ atomically $ putTMVar tm m'
|
||||
return p
|
||||
withResourcePool p startcatfile a
|
||||
where
|
||||
startcatfile = inRepo Git.CatFile.catFileStart
|
||||
startcatfile = inRepo startcat
|
||||
|
||||
{- A lot of git cat-file processes are unlikely to improve concurrency,
|
||||
- because a query to them takes only a little bit of CPU, and tends to be
|
||||
|
@ -124,12 +142,14 @@ catFileStop = do
|
|||
cfh <- Annex.getState Annex.catfilehandles
|
||||
m <- case cfh of
|
||||
CatFileHandlesNonConcurrent m -> do
|
||||
Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent M.empty }
|
||||
Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent emptyCatMap }
|
||||
return m
|
||||
CatFileHandlesPool tm ->
|
||||
liftIO $ atomically $ swapTMVar tm M.empty
|
||||
liftIO $ forM_ (M.elems m) $ \p ->
|
||||
liftIO $ atomically $ swapTMVar tm emptyCatMap
|
||||
liftIO $ forM_ (M.elems (catFileMap m)) $ \p ->
|
||||
freeResourcePool p Git.CatFile.catFileStop
|
||||
liftIO $ forM_ (M.elems (catFileMetaDataMap m)) $ \p ->
|
||||
freeResourcePool p Git.CatFile.catFileMetaDataStop
|
||||
|
||||
{- From ref to a symlink or a pointer file, get the key. -}
|
||||
catKey :: Ref -> Annex (Maybe Key)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git cat-file interface
|
||||
-
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -10,9 +10,13 @@
|
|||
|
||||
module Git.CatFile (
|
||||
CatFileHandle,
|
||||
CatFileMetaDataHandle,
|
||||
catFileStart,
|
||||
catFileMetaDataStart,
|
||||
catFileStart',
|
||||
catFileMetaDataStart',
|
||||
catFileStop,
|
||||
catFileMetaDataStop,
|
||||
catFile,
|
||||
catFileDetails,
|
||||
catTree,
|
||||
|
@ -55,8 +59,12 @@ import Utility.Tuple
|
|||
|
||||
data CatFileHandle = CatFileHandle
|
||||
{ catFileProcess :: CoProcess.CoProcessHandle
|
||||
, checkFileProcess :: CoProcess.CoProcessHandle
|
||||
, gitRepo :: Repo
|
||||
, catFileGitRepo :: Repo
|
||||
}
|
||||
|
||||
data CatFileMetaDataHandle = CatFileMetaDataHandle
|
||||
{ checkFileProcess :: CoProcess.CoProcessHandle
|
||||
, checkFileGitRepo :: Repo
|
||||
}
|
||||
|
||||
catFileStart :: Repo -> IO CatFileHandle
|
||||
|
@ -64,22 +72,31 @@ catFileStart = catFileStart' True
|
|||
|
||||
catFileStart' :: Bool -> Repo -> IO CatFileHandle
|
||||
catFileStart' restartable repo = CatFileHandle
|
||||
<$> startp "--batch"
|
||||
<*> startp ("--batch-check=" ++ batchFormat)
|
||||
<$> startcat restartable repo "--batch"
|
||||
<*> pure repo
|
||||
|
||||
catFileMetaDataStart :: Repo -> IO CatFileMetaDataHandle
|
||||
catFileMetaDataStart = catFileMetaDataStart' True
|
||||
|
||||
catFileMetaDataStart' :: Bool -> Repo -> IO CatFileMetaDataHandle
|
||||
catFileMetaDataStart' restartable repo = CatFileMetaDataHandle
|
||||
<$> startcat restartable repo ("--batch-check=" ++ batchFormat)
|
||||
<*> pure repo
|
||||
where
|
||||
startp p = gitCoProcessStart restartable
|
||||
[ Param "cat-file"
|
||||
, Param p
|
||||
] repo
|
||||
|
||||
batchFormat :: String
|
||||
batchFormat = "%(objectname) %(objecttype) %(objectsize)"
|
||||
|
||||
startcat :: Bool -> Repo -> String -> IO CoProcess.CoProcessHandle
|
||||
startcat restartable repo p = gitCoProcessStart restartable
|
||||
[ Param "cat-file"
|
||||
, Param p
|
||||
] repo
|
||||
|
||||
catFileStop :: CatFileHandle -> IO ()
|
||||
catFileStop h = do
|
||||
CoProcess.stop (catFileProcess h)
|
||||
CoProcess.stop (checkFileProcess h)
|
||||
catFileStop = CoProcess.stop . catFileProcess
|
||||
|
||||
catFileMetaDataStop :: CatFileMetaDataHandle -> IO ()
|
||||
catFileMetaDataStop = CoProcess.stop . checkFileProcess
|
||||
|
||||
{- Reads a file from a specified branch. -}
|
||||
catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
|
||||
|
@ -106,16 +123,16 @@ catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \f
|
|||
Nothing -> error $ "unknown response from git cat-file " ++ show (header, object)
|
||||
where
|
||||
-- Slow fallback path for filenames containing newlines.
|
||||
newlinefallback = queryObjectType object (gitRepo h) >>= \case
|
||||
newlinefallback = queryObjectType object (catFileGitRepo h) >>= \case
|
||||
Nothing -> return Nothing
|
||||
Just objtype -> queryContent object (gitRepo h) >>= \case
|
||||
Just objtype -> queryContent object (catFileGitRepo h) >>= \case
|
||||
Nothing -> return Nothing
|
||||
Just content -> do
|
||||
-- only the --batch interface allows getting
|
||||
-- the sha, so have to re-hash the object
|
||||
sha <- hashObject' objtype
|
||||
(flip L.hPut content)
|
||||
(gitRepo h)
|
||||
(catFileGitRepo h)
|
||||
return (Just (content, sha, objtype))
|
||||
|
||||
readObjectContent :: Handle -> ParsedResp -> IO L.ByteString
|
||||
|
@ -131,7 +148,7 @@ readObjectContent h (ParsedResp _ _ size) = do
|
|||
readObjectContent _ DNE = error "internal"
|
||||
|
||||
{- Gets the size and type of an object, without reading its content. -}
|
||||
catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
|
||||
catObjectMetaData :: CatFileMetaDataHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
|
||||
catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
|
||||
resp <- S8.hGetLine from
|
||||
case parseResp object resp of
|
||||
|
@ -142,9 +159,9 @@ catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $
|
|||
where
|
||||
-- Slow fallback path for filenames containing newlines.
|
||||
newlinefallback = do
|
||||
sha <- Git.Ref.sha object (gitRepo h)
|
||||
sz <- querySize object (gitRepo h)
|
||||
objtype <- queryObjectType object (gitRepo h)
|
||||
sha <- Git.Ref.sha object (checkFileGitRepo h)
|
||||
sz <- querySize object (checkFileGitRepo h)
|
||||
objtype <- queryObjectType object (checkFileGitRepo h)
|
||||
return $ (,,) <$> sha <*> sz <*> objtype
|
||||
|
||||
data ParsedResp = ParsedResp Sha ObjectType FileSize | DNE
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-cat file handles pools
|
||||
-
|
||||
- Copyright 2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2020-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -9,22 +9,30 @@ module Types.CatFileHandles (
|
|||
CatFileHandles(..),
|
||||
catFileHandlesNonConcurrent,
|
||||
catFileHandlesPool,
|
||||
CatMap(..),
|
||||
emptyCatMap,
|
||||
) where
|
||||
|
||||
import Control.Concurrent.STM
|
||||
import qualified Data.Map as M
|
||||
|
||||
import Utility.ResourcePool
|
||||
import Git.CatFile (CatFileHandle)
|
||||
import Git.CatFile (CatFileHandle, CatFileMetaDataHandle)
|
||||
|
||||
data CatFileHandles
|
||||
= CatFileHandlesNonConcurrent CatMap
|
||||
| CatFileHandlesPool (TMVar CatMap)
|
||||
|
||||
type CatMap = M.Map FilePath (ResourcePool CatFileHandle)
|
||||
data CatMap = CatMap
|
||||
{ catFileMap :: M.Map FilePath (ResourcePool CatFileHandle)
|
||||
, catFileMetaDataMap :: M.Map FilePath (ResourcePool CatFileMetaDataHandle)
|
||||
}
|
||||
|
||||
emptyCatMap :: CatMap
|
||||
emptyCatMap = CatMap M.empty M.empty
|
||||
|
||||
catFileHandlesNonConcurrent :: CatFileHandles
|
||||
catFileHandlesNonConcurrent = CatFileHandlesNonConcurrent M.empty
|
||||
catFileHandlesNonConcurrent = CatFileHandlesNonConcurrent emptyCatMap
|
||||
|
||||
catFileHandlesPool :: IO CatFileHandles
|
||||
catFileHandlesPool = CatFileHandlesPool <$> newTMVarIO M.empty
|
||||
catFileHandlesPool = CatFileHandlesPool <$> newTMVarIO emptyCatMap
|
||||
|
|
Loading…
Reference in a new issue