better catObjectStream' and use Chan

The catObjectStream' is generic enough to let it be nicely used from
inside Annex monad.

Chan will be faster than DList here. Bearing in mind, it is unbounded,
but in reality will be bounded by the size of the stdio buffer through
git cat-file.

This speeds up --all by about 10% although I think only getting back to
the previous performance before I introduced that DList.
This commit is contained in:
Joey Hess 2020-07-10 13:05:03 -04:00
parent f63a7aa0e7
commit bd2d304064
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38

View file

@ -37,7 +37,7 @@ import Numeric
import System.Posix.Types import System.Posix.Types
import Text.Read import Text.Read
import Control.Concurrent.Async import Control.Concurrent.Async
import Control.Concurrent.STM import Control.Concurrent.Chan
import Control.Monad.IO.Class (MonadIO) import Control.Monad.IO.Class (MonadIO)
import Common import Common
@ -51,7 +51,6 @@ import Git.HashObject
import qualified Git.LsTree as LsTree import qualified Git.LsTree as LsTree
import qualified Utility.CoProcess as CoProcess import qualified Utility.CoProcess as CoProcess
import Utility.Tuple import Utility.Tuple
import Utility.TList
data CatFileHandle = CatFileHandle data CatFileHandle = CatFileHandle
{ catFileProcess :: CoProcess.CoProcessHandle { catFileProcess :: CoProcess.CoProcessHandle
@ -83,12 +82,12 @@ catFileStop h = do
{- Reads a file from a specified branch. -} {- Reads a file from a specified branch. -}
catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
catFile h branch file = catObject h $ Ref $ catFile h branch file = catObject h $
fromRef' branch <> ":" <> toInternalGitPath file Git.Ref.branchFileRef branch file
catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType)) catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType))
catFileDetails h branch file = catObjectDetails h $ Ref $ catFileDetails h branch file = catObjectDetails h $
fromRef' branch <> ":" <> toInternalGitPath file Git.Ref.branchFileRef branch file
{- Uses a running git cat-file read the content of an object. {- Uses a running git cat-file read the content of an object.
- Objects that do not exist will have "" returned. -} - Objects that do not exist will have "" returned. -}
@ -286,6 +285,9 @@ parseCommit b = Commit
{- Uses cat-file to stream the contents of the files as efficiently {- Uses cat-file to stream the contents of the files as efficiently
- as possible. This is much faster than querying it repeatedly per file. - as possible. This is much faster than querying it repeatedly per file.
-
- While this could be made more polymorhpic, specialization is important
- to its performance.
-} -}
catObjectStream catObjectStream
:: (MonadMask m, MonadIO m) :: (MonadMask m, MonadIO m)
@ -294,43 +296,48 @@ catObjectStream
-> Repo -> Repo
-> (IO (Maybe (TopFilePath, Maybe L.ByteString)) -> m ()) -> (IO (Maybe (TopFilePath, Maybe L.ByteString)) -> m ())
-> m () -> m ()
catObjectStream l want repo a = withCatObjectStream repo feeder $ catObjectStream l want repo reader = withCatObjectStream repo $
\mv hout -> a (catObjectReader mv hout) \c hin hout -> bracketIO
(async $ feeder c hin)
cancel
(const (reader (catObjectReader c hout)))
where where
feeder mv h = do feeder c h = do
forM_ l $ \ti -> forM_ l $ \ti ->
when (want ti) $ do when (want ti) $ do
let f = LsTree.file ti let f = LsTree.file ti
let sha = LsTree.sha ti let sha = LsTree.sha ti
liftIO $ atomically $ snocTList mv (sha, f) liftIO $ writeChan c (sha, f)
S8.hPutStrLn h (fromRef' sha) S8.hPutStrLn h (fromRef' sha)
hClose h hClose h
{- While this variant could be combined with catObjectStream into a
- more polymorphic function, the specialization of both is important for
- performance. -}
catObjectStream' catObjectStream'
:: (MonadMask m, MonadIO m) :: (MonadMask m, MonadIO m)
=> [(RawFilePath, Sha, FileMode)] => Repo
-> Repo -> (
-> (IO (Maybe (RawFilePath, Maybe L.ByteString)) -> m ()) ((v, Ref) -> IO ()) -- ^ call to feed values in
-> IO () -- call once all values are fed in
-> IO (Maybe (v, Maybe L.ByteString)) -- call to read results
-> m ()
)
-> m () -> m ()
catObjectStream' l repo a = withCatObjectStream repo feeder $ catObjectStream' repo a = withCatObjectStream repo go
\mv hout -> a (catObjectReader mv hout)
where where
feeder mv h = do go c hin hout = a
forM_ l $ \(f, sha, _) -> do (feeder c hin)
liftIO $ atomically $ snocTList mv (sha, f) (hClose hin)
S8.hPutStrLn h (fromRef' sha) (catObjectReader c hout)
hClose h feeder c h (v, ref) = do
liftIO $ writeChan c (ref, v)
S8.hPutStrLn h (fromRef' ref)
catObjectReader :: TList (Ref, a) -> Handle -> IO (Maybe (a, Maybe L.ByteString)) catObjectReader :: Chan (Ref, a) -> Handle -> IO (Maybe (a, Maybe L.ByteString))
catObjectReader mv h = ifM (hIsEOF h) catObjectReader c h = ifM (hIsEOF h)
( return Nothing ( return Nothing
, do , do
(sha, f) <- liftIO $ atomically $ headTList mv (ref, f) <- liftIO $ readChan c
resp <- S8.hGetLine h resp <- S8.hGetLine h
case parseResp sha resp of case parseResp ref resp of
Just r@(ParsedResp {}) -> do Just r@(ParsedResp {}) -> do
content <- readObjectContent h r content <- readObjectContent h r
return (Just (f, Just content)) return (Just (f, Just content))
@ -341,11 +348,10 @@ catObjectReader mv h = ifM (hIsEOF h)
withCatObjectStream withCatObjectStream
:: (MonadMask m, MonadIO m) :: (MonadMask m, MonadIO m)
=> Repo => Repo
-> (TList a -> Handle -> IO ()) -> (Chan a -> Handle -> Handle -> m ())
-> (TList a -> Handle -> m ())
-> m () -> m ()
withCatObjectStream repo feeder reader = assertLocal repo $ withCatObjectStream repo reader = assertLocal repo $
bracketIO start stop $ \(mv, _, _, hout, _) -> reader mv hout bracketIO start stop $ \(c, hin, hout, _) -> reader c hin hout
where where
params = params =
[ Param "cat-file" [ Param "cat-file"
@ -354,17 +360,15 @@ withCatObjectStream repo feeder reader = assertLocal repo $
] ]
start = do start = do
mv <- liftIO $ atomically newTList
let p = gitCreateProcess params repo let p = gitCreateProcess params repo
(Just hin, Just hout, _, pid) <- createProcess p (Just hin, Just hout, _, pid) <- createProcess p
{ std_in = CreatePipe { std_in = CreatePipe
, std_out = CreatePipe , std_out = CreatePipe
} }
f <- async (feeder mv hin) c <- newChan
return (mv, f, hin, hout, pid) return (c, hin, hout, pid)
stop (_, f, hin, hout, pid) = do stop (_, hin, hout, pid) = do
cancel f
hClose hin hClose hin
hClose hout hClose hout
void $ checkSuccessProcess pid void $ checkSuccessProcess pid