separate handles for cat-file and cat-file --batch-check
This avoids starting one process when only the other one is needed. Eg in git-annex smudge --clean, this reduces the total number of cat-file processes that are started from 4 to 2. The only performance penalty is that when both are needed, it has to do twice as much work to maintain the two Maps. But both are very small, consisting of 1 or 2 items, so that work is negligible. Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
e6c0bbd645
commit
e47b4badb3
3 changed files with 82 additions and 37 deletions
|
@ -1,6 +1,6 @@
|
||||||
{- git cat-file interface, with handle automatically stored in the Annex monad
|
{- git cat-file interface, with handle automatically stored in the Annex monad
|
||||||
-
|
-
|
||||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -58,7 +58,7 @@ catObject ref = withCatFileHandle $ \h ->
|
||||||
liftIO $ Git.CatFile.catObject h ref
|
liftIO $ Git.CatFile.catObject h ref
|
||||||
|
|
||||||
catObjectMetaData :: Git.Ref -> Annex (Maybe (Sha, Integer, ObjectType))
|
catObjectMetaData :: Git.Ref -> Annex (Maybe (Sha, Integer, ObjectType))
|
||||||
catObjectMetaData ref = withCatFileHandle $ \h ->
|
catObjectMetaData ref = withCatFileMetaDataHandle $ \h ->
|
||||||
liftIO $ Git.CatFile.catObjectMetaData h ref
|
liftIO $ Git.CatFile.catObjectMetaData h ref
|
||||||
|
|
||||||
catTree :: Git.Ref -> Annex [(FilePath, FileMode)]
|
catTree :: Git.Ref -> Annex [(FilePath, FileMode)]
|
||||||
|
@ -77,32 +77,50 @@ catObjectDetails ref = withCatFileHandle $ \h ->
|
||||||
- for each. That is selected by setting GIT_INDEX_FILE in the gitEnv
|
- for each. That is selected by setting GIT_INDEX_FILE in the gitEnv
|
||||||
- before running this. -}
|
- before running this. -}
|
||||||
withCatFileHandle :: (Git.CatFile.CatFileHandle -> Annex a) -> Annex a
|
withCatFileHandle :: (Git.CatFile.CatFileHandle -> Annex a) -> Annex a
|
||||||
withCatFileHandle a = do
|
withCatFileHandle = withCatFileHandle'
|
||||||
|
Git.CatFile.catFileStart
|
||||||
|
catFileMap
|
||||||
|
(\v m -> v { catFileMap = m })
|
||||||
|
|
||||||
|
withCatFileMetaDataHandle :: (Git.CatFile.CatFileMetaDataHandle -> Annex a) -> Annex a
|
||||||
|
withCatFileMetaDataHandle = withCatFileHandle'
|
||||||
|
Git.CatFile.catFileMetaDataStart
|
||||||
|
catFileMetaDataMap
|
||||||
|
(\v m -> v { catFileMetaDataMap = m })
|
||||||
|
|
||||||
|
withCatFileHandle'
|
||||||
|
:: (Repo -> IO hdl)
|
||||||
|
-> (CatMap -> M.Map FilePath (ResourcePool hdl))
|
||||||
|
-> (CatMap -> M.Map FilePath (ResourcePool hdl) -> CatMap)
|
||||||
|
-> (hdl -> Annex a)
|
||||||
|
-> Annex a
|
||||||
|
withCatFileHandle' startcat get set a = do
|
||||||
cfh <- Annex.getState Annex.catfilehandles
|
cfh <- Annex.getState Annex.catfilehandles
|
||||||
indexfile <- fromMaybe "" . maybe Nothing (lookup indexEnv)
|
indexfile <- fromMaybe "" . maybe Nothing (lookup indexEnv)
|
||||||
<$> fromRepo gitEnv
|
<$> fromRepo gitEnv
|
||||||
p <- case cfh of
|
p <- case cfh of
|
||||||
CatFileHandlesNonConcurrent m -> case M.lookup indexfile m of
|
CatFileHandlesNonConcurrent m -> case M.lookup indexfile (get m) of
|
||||||
Just p -> return p
|
Just p -> return p
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
p <- mkResourcePoolNonConcurrent startcatfile
|
p <- mkResourcePoolNonConcurrent startcatfile
|
||||||
let !m' = M.insert indexfile p m
|
let !m' = set m (M.insert indexfile p (get m))
|
||||||
Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent m' }
|
Annex.changeState $ \s -> s
|
||||||
|
{ Annex.catfilehandles = CatFileHandlesNonConcurrent m' }
|
||||||
return p
|
return p
|
||||||
CatFileHandlesPool tm -> do
|
CatFileHandlesPool tm -> do
|
||||||
m <- liftIO $ atomically $ takeTMVar tm
|
m <- liftIO $ atomically $ takeTMVar tm
|
||||||
case M.lookup indexfile m of
|
case M.lookup indexfile (get m) of
|
||||||
Just p -> do
|
Just p -> do
|
||||||
liftIO $ atomically $ putTMVar tm m
|
liftIO $ atomically $ putTMVar tm m
|
||||||
return p
|
return p
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
p <- mkResourcePool maxCatFiles
|
p <- mkResourcePool maxCatFiles
|
||||||
let !m' = M.insert indexfile p m
|
let !m' = set m (M.insert indexfile p (get m))
|
||||||
liftIO $ atomically $ putTMVar tm m'
|
liftIO $ atomically $ putTMVar tm m'
|
||||||
return p
|
return p
|
||||||
withResourcePool p startcatfile a
|
withResourcePool p startcatfile a
|
||||||
where
|
where
|
||||||
startcatfile = inRepo Git.CatFile.catFileStart
|
startcatfile = inRepo startcat
|
||||||
|
|
||||||
{- A lot of git cat-file processes are unlikely to improve concurrency,
|
{- A lot of git cat-file processes are unlikely to improve concurrency,
|
||||||
- because a query to them takes only a little bit of CPU, and tends to be
|
- because a query to them takes only a little bit of CPU, and tends to be
|
||||||
|
@ -124,12 +142,14 @@ catFileStop = do
|
||||||
cfh <- Annex.getState Annex.catfilehandles
|
cfh <- Annex.getState Annex.catfilehandles
|
||||||
m <- case cfh of
|
m <- case cfh of
|
||||||
CatFileHandlesNonConcurrent m -> do
|
CatFileHandlesNonConcurrent m -> do
|
||||||
Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent M.empty }
|
Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent emptyCatMap }
|
||||||
return m
|
return m
|
||||||
CatFileHandlesPool tm ->
|
CatFileHandlesPool tm ->
|
||||||
liftIO $ atomically $ swapTMVar tm M.empty
|
liftIO $ atomically $ swapTMVar tm emptyCatMap
|
||||||
liftIO $ forM_ (M.elems m) $ \p ->
|
liftIO $ forM_ (M.elems (catFileMap m)) $ \p ->
|
||||||
freeResourcePool p Git.CatFile.catFileStop
|
freeResourcePool p Git.CatFile.catFileStop
|
||||||
|
liftIO $ forM_ (M.elems (catFileMetaDataMap m)) $ \p ->
|
||||||
|
freeResourcePool p Git.CatFile.catFileMetaDataStop
|
||||||
|
|
||||||
{- From ref to a symlink or a pointer file, get the key. -}
|
{- From ref to a symlink or a pointer file, get the key. -}
|
||||||
catKey :: Ref -> Annex (Maybe Key)
|
catKey :: Ref -> Annex (Maybe Key)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git cat-file interface
|
{- git cat-file interface
|
||||||
-
|
-
|
||||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -10,9 +10,13 @@
|
||||||
|
|
||||||
module Git.CatFile (
|
module Git.CatFile (
|
||||||
CatFileHandle,
|
CatFileHandle,
|
||||||
|
CatFileMetaDataHandle,
|
||||||
catFileStart,
|
catFileStart,
|
||||||
|
catFileMetaDataStart,
|
||||||
catFileStart',
|
catFileStart',
|
||||||
|
catFileMetaDataStart',
|
||||||
catFileStop,
|
catFileStop,
|
||||||
|
catFileMetaDataStop,
|
||||||
catFile,
|
catFile,
|
||||||
catFileDetails,
|
catFileDetails,
|
||||||
catTree,
|
catTree,
|
||||||
|
@ -55,8 +59,12 @@ import Utility.Tuple
|
||||||
|
|
||||||
data CatFileHandle = CatFileHandle
|
data CatFileHandle = CatFileHandle
|
||||||
{ catFileProcess :: CoProcess.CoProcessHandle
|
{ catFileProcess :: CoProcess.CoProcessHandle
|
||||||
, checkFileProcess :: CoProcess.CoProcessHandle
|
, catFileGitRepo :: Repo
|
||||||
, gitRepo :: Repo
|
}
|
||||||
|
|
||||||
|
data CatFileMetaDataHandle = CatFileMetaDataHandle
|
||||||
|
{ checkFileProcess :: CoProcess.CoProcessHandle
|
||||||
|
, checkFileGitRepo :: Repo
|
||||||
}
|
}
|
||||||
|
|
||||||
catFileStart :: Repo -> IO CatFileHandle
|
catFileStart :: Repo -> IO CatFileHandle
|
||||||
|
@ -64,22 +72,31 @@ catFileStart = catFileStart' True
|
||||||
|
|
||||||
catFileStart' :: Bool -> Repo -> IO CatFileHandle
|
catFileStart' :: Bool -> Repo -> IO CatFileHandle
|
||||||
catFileStart' restartable repo = CatFileHandle
|
catFileStart' restartable repo = CatFileHandle
|
||||||
<$> startp "--batch"
|
<$> startcat restartable repo "--batch"
|
||||||
<*> startp ("--batch-check=" ++ batchFormat)
|
<*> pure repo
|
||||||
|
|
||||||
|
catFileMetaDataStart :: Repo -> IO CatFileMetaDataHandle
|
||||||
|
catFileMetaDataStart = catFileMetaDataStart' True
|
||||||
|
|
||||||
|
catFileMetaDataStart' :: Bool -> Repo -> IO CatFileMetaDataHandle
|
||||||
|
catFileMetaDataStart' restartable repo = CatFileMetaDataHandle
|
||||||
|
<$> startcat restartable repo ("--batch-check=" ++ batchFormat)
|
||||||
<*> pure repo
|
<*> pure repo
|
||||||
where
|
|
||||||
startp p = gitCoProcessStart restartable
|
|
||||||
[ Param "cat-file"
|
|
||||||
, Param p
|
|
||||||
] repo
|
|
||||||
|
|
||||||
batchFormat :: String
|
batchFormat :: String
|
||||||
batchFormat = "%(objectname) %(objecttype) %(objectsize)"
|
batchFormat = "%(objectname) %(objecttype) %(objectsize)"
|
||||||
|
|
||||||
|
startcat :: Bool -> Repo -> String -> IO CoProcess.CoProcessHandle
|
||||||
|
startcat restartable repo p = gitCoProcessStart restartable
|
||||||
|
[ Param "cat-file"
|
||||||
|
, Param p
|
||||||
|
] repo
|
||||||
|
|
||||||
catFileStop :: CatFileHandle -> IO ()
|
catFileStop :: CatFileHandle -> IO ()
|
||||||
catFileStop h = do
|
catFileStop = CoProcess.stop . catFileProcess
|
||||||
CoProcess.stop (catFileProcess h)
|
|
||||||
CoProcess.stop (checkFileProcess h)
|
catFileMetaDataStop :: CatFileMetaDataHandle -> IO ()
|
||||||
|
catFileMetaDataStop = CoProcess.stop . checkFileProcess
|
||||||
|
|
||||||
{- Reads a file from a specified branch. -}
|
{- Reads a file from a specified branch. -}
|
||||||
catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
|
catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
|
||||||
|
@ -106,16 +123,16 @@ catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \f
|
||||||
Nothing -> error $ "unknown response from git cat-file " ++ show (header, object)
|
Nothing -> error $ "unknown response from git cat-file " ++ show (header, object)
|
||||||
where
|
where
|
||||||
-- Slow fallback path for filenames containing newlines.
|
-- Slow fallback path for filenames containing newlines.
|
||||||
newlinefallback = queryObjectType object (gitRepo h) >>= \case
|
newlinefallback = queryObjectType object (catFileGitRepo h) >>= \case
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just objtype -> queryContent object (gitRepo h) >>= \case
|
Just objtype -> queryContent object (catFileGitRepo h) >>= \case
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just content -> do
|
Just content -> do
|
||||||
-- only the --batch interface allows getting
|
-- only the --batch interface allows getting
|
||||||
-- the sha, so have to re-hash the object
|
-- the sha, so have to re-hash the object
|
||||||
sha <- hashObject' objtype
|
sha <- hashObject' objtype
|
||||||
(flip L.hPut content)
|
(flip L.hPut content)
|
||||||
(gitRepo h)
|
(catFileGitRepo h)
|
||||||
return (Just (content, sha, objtype))
|
return (Just (content, sha, objtype))
|
||||||
|
|
||||||
readObjectContent :: Handle -> ParsedResp -> IO L.ByteString
|
readObjectContent :: Handle -> ParsedResp -> IO L.ByteString
|
||||||
|
@ -131,7 +148,7 @@ readObjectContent h (ParsedResp _ _ size) = do
|
||||||
readObjectContent _ DNE = error "internal"
|
readObjectContent _ DNE = error "internal"
|
||||||
|
|
||||||
{- Gets the size and type of an object, without reading its content. -}
|
{- Gets the size and type of an object, without reading its content. -}
|
||||||
catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
|
catObjectMetaData :: CatFileMetaDataHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
|
||||||
catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
|
catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
|
||||||
resp <- S8.hGetLine from
|
resp <- S8.hGetLine from
|
||||||
case parseResp object resp of
|
case parseResp object resp of
|
||||||
|
@ -142,9 +159,9 @@ catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $
|
||||||
where
|
where
|
||||||
-- Slow fallback path for filenames containing newlines.
|
-- Slow fallback path for filenames containing newlines.
|
||||||
newlinefallback = do
|
newlinefallback = do
|
||||||
sha <- Git.Ref.sha object (gitRepo h)
|
sha <- Git.Ref.sha object (checkFileGitRepo h)
|
||||||
sz <- querySize object (gitRepo h)
|
sz <- querySize object (checkFileGitRepo h)
|
||||||
objtype <- queryObjectType object (gitRepo h)
|
objtype <- queryObjectType object (checkFileGitRepo h)
|
||||||
return $ (,,) <$> sha <*> sz <*> objtype
|
return $ (,,) <$> sha <*> sz <*> objtype
|
||||||
|
|
||||||
data ParsedResp = ParsedResp Sha ObjectType FileSize | DNE
|
data ParsedResp = ParsedResp Sha ObjectType FileSize | DNE
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-cat file handles pools
|
{- git-cat file handles pools
|
||||||
-
|
-
|
||||||
- Copyright 2020 Joey Hess <id@joeyh.name>
|
- Copyright 2020-2021 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -9,22 +9,30 @@ module Types.CatFileHandles (
|
||||||
CatFileHandles(..),
|
CatFileHandles(..),
|
||||||
catFileHandlesNonConcurrent,
|
catFileHandlesNonConcurrent,
|
||||||
catFileHandlesPool,
|
catFileHandlesPool,
|
||||||
|
CatMap(..),
|
||||||
|
emptyCatMap,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Concurrent.STM
|
import Control.Concurrent.STM
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
|
||||||
import Utility.ResourcePool
|
import Utility.ResourcePool
|
||||||
import Git.CatFile (CatFileHandle)
|
import Git.CatFile (CatFileHandle, CatFileMetaDataHandle)
|
||||||
|
|
||||||
data CatFileHandles
|
data CatFileHandles
|
||||||
= CatFileHandlesNonConcurrent CatMap
|
= CatFileHandlesNonConcurrent CatMap
|
||||||
| CatFileHandlesPool (TMVar CatMap)
|
| CatFileHandlesPool (TMVar CatMap)
|
||||||
|
|
||||||
type CatMap = M.Map FilePath (ResourcePool CatFileHandle)
|
data CatMap = CatMap
|
||||||
|
{ catFileMap :: M.Map FilePath (ResourcePool CatFileHandle)
|
||||||
|
, catFileMetaDataMap :: M.Map FilePath (ResourcePool CatFileMetaDataHandle)
|
||||||
|
}
|
||||||
|
|
||||||
|
emptyCatMap :: CatMap
|
||||||
|
emptyCatMap = CatMap M.empty M.empty
|
||||||
|
|
||||||
catFileHandlesNonConcurrent :: CatFileHandles
|
catFileHandlesNonConcurrent :: CatFileHandles
|
||||||
catFileHandlesNonConcurrent = CatFileHandlesNonConcurrent M.empty
|
catFileHandlesNonConcurrent = CatFileHandlesNonConcurrent emptyCatMap
|
||||||
|
|
||||||
catFileHandlesPool :: IO CatFileHandles
|
catFileHandlesPool :: IO CatFileHandles
|
||||||
catFileHandlesPool = CatFileHandlesPool <$> newTMVarIO M.empty
|
catFileHandlesPool = CatFileHandlesPool <$> newTMVarIO emptyCatMap
|
||||||
|
|
Loading…
Reference in a new issue