better catObjectStream' and use Chan
The catObjectStream' is generic enough to let it be nicely used from inside Annex monad. Chan will be faster than DList here. Bearing in mind, it is unbounded, but in reality will be bounded by the size of the stdio buffer through git cat-file. This speeds up --all by about 10% although I think only getting back to the previous performance before I introduced that DList.
This commit is contained in:
parent
f63a7aa0e7
commit
bd2d304064
1 changed files with 40 additions and 36 deletions
|
@ -37,7 +37,7 @@ import Numeric
|
||||||
import System.Posix.Types
|
import System.Posix.Types
|
||||||
import Text.Read
|
import Text.Read
|
||||||
import Control.Concurrent.Async
|
import Control.Concurrent.Async
|
||||||
import Control.Concurrent.STM
|
import Control.Concurrent.Chan
|
||||||
import Control.Monad.IO.Class (MonadIO)
|
import Control.Monad.IO.Class (MonadIO)
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
|
@ -51,7 +51,6 @@ import Git.HashObject
|
||||||
import qualified Git.LsTree as LsTree
|
import qualified Git.LsTree as LsTree
|
||||||
import qualified Utility.CoProcess as CoProcess
|
import qualified Utility.CoProcess as CoProcess
|
||||||
import Utility.Tuple
|
import Utility.Tuple
|
||||||
import Utility.TList
|
|
||||||
|
|
||||||
data CatFileHandle = CatFileHandle
|
data CatFileHandle = CatFileHandle
|
||||||
{ catFileProcess :: CoProcess.CoProcessHandle
|
{ catFileProcess :: CoProcess.CoProcessHandle
|
||||||
|
@ -83,12 +82,12 @@ catFileStop h = do
|
||||||
|
|
||||||
{- Reads a file from a specified branch. -}
|
{- Reads a file from a specified branch. -}
|
||||||
catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
|
catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
|
||||||
catFile h branch file = catObject h $ Ref $
|
catFile h branch file = catObject h $
|
||||||
fromRef' branch <> ":" <> toInternalGitPath file
|
Git.Ref.branchFileRef branch file
|
||||||
|
|
||||||
catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType))
|
catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType))
|
||||||
catFileDetails h branch file = catObjectDetails h $ Ref $
|
catFileDetails h branch file = catObjectDetails h $
|
||||||
fromRef' branch <> ":" <> toInternalGitPath file
|
Git.Ref.branchFileRef branch file
|
||||||
|
|
||||||
{- Uses a running git cat-file read the content of an object.
|
{- Uses a running git cat-file read the content of an object.
|
||||||
- Objects that do not exist will have "" returned. -}
|
- Objects that do not exist will have "" returned. -}
|
||||||
|
@ -286,6 +285,9 @@ parseCommit b = Commit
|
||||||
|
|
||||||
{- Uses cat-file to stream the contents of the files as efficiently
|
{- Uses cat-file to stream the contents of the files as efficiently
|
||||||
- as possible. This is much faster than querying it repeatedly per file.
|
- as possible. This is much faster than querying it repeatedly per file.
|
||||||
|
-
|
||||||
|
- While this could be made more polymorhpic, specialization is important
|
||||||
|
- to its performance.
|
||||||
-}
|
-}
|
||||||
catObjectStream
|
catObjectStream
|
||||||
:: (MonadMask m, MonadIO m)
|
:: (MonadMask m, MonadIO m)
|
||||||
|
@ -294,43 +296,48 @@ catObjectStream
|
||||||
-> Repo
|
-> Repo
|
||||||
-> (IO (Maybe (TopFilePath, Maybe L.ByteString)) -> m ())
|
-> (IO (Maybe (TopFilePath, Maybe L.ByteString)) -> m ())
|
||||||
-> m ()
|
-> m ()
|
||||||
catObjectStream l want repo a = withCatObjectStream repo feeder $
|
catObjectStream l want repo reader = withCatObjectStream repo $
|
||||||
\mv hout -> a (catObjectReader mv hout)
|
\c hin hout -> bracketIO
|
||||||
|
(async $ feeder c hin)
|
||||||
|
cancel
|
||||||
|
(const (reader (catObjectReader c hout)))
|
||||||
where
|
where
|
||||||
feeder mv h = do
|
feeder c h = do
|
||||||
forM_ l $ \ti ->
|
forM_ l $ \ti ->
|
||||||
when (want ti) $ do
|
when (want ti) $ do
|
||||||
let f = LsTree.file ti
|
let f = LsTree.file ti
|
||||||
let sha = LsTree.sha ti
|
let sha = LsTree.sha ti
|
||||||
liftIO $ atomically $ snocTList mv (sha, f)
|
liftIO $ writeChan c (sha, f)
|
||||||
S8.hPutStrLn h (fromRef' sha)
|
S8.hPutStrLn h (fromRef' sha)
|
||||||
hClose h
|
hClose h
|
||||||
|
|
||||||
{- While this variant could be combined with catObjectStream into a
|
|
||||||
- more polymorphic function, the specialization of both is important for
|
|
||||||
- performance. -}
|
|
||||||
catObjectStream'
|
catObjectStream'
|
||||||
:: (MonadMask m, MonadIO m)
|
:: (MonadMask m, MonadIO m)
|
||||||
=> [(RawFilePath, Sha, FileMode)]
|
=> Repo
|
||||||
-> Repo
|
-> (
|
||||||
-> (IO (Maybe (RawFilePath, Maybe L.ByteString)) -> m ())
|
((v, Ref) -> IO ()) -- ^ call to feed values in
|
||||||
|
-> IO () -- call once all values are fed in
|
||||||
|
-> IO (Maybe (v, Maybe L.ByteString)) -- call to read results
|
||||||
|
-> m ()
|
||||||
|
)
|
||||||
-> m ()
|
-> m ()
|
||||||
catObjectStream' l repo a = withCatObjectStream repo feeder $
|
catObjectStream' repo a = withCatObjectStream repo go
|
||||||
\mv hout -> a (catObjectReader mv hout)
|
|
||||||
where
|
where
|
||||||
feeder mv h = do
|
go c hin hout = a
|
||||||
forM_ l $ \(f, sha, _) -> do
|
(feeder c hin)
|
||||||
liftIO $ atomically $ snocTList mv (sha, f)
|
(hClose hin)
|
||||||
S8.hPutStrLn h (fromRef' sha)
|
(catObjectReader c hout)
|
||||||
hClose h
|
feeder c h (v, ref) = do
|
||||||
|
liftIO $ writeChan c (ref, v)
|
||||||
|
S8.hPutStrLn h (fromRef' ref)
|
||||||
|
|
||||||
catObjectReader :: TList (Ref, a) -> Handle -> IO (Maybe (a, Maybe L.ByteString))
|
catObjectReader :: Chan (Ref, a) -> Handle -> IO (Maybe (a, Maybe L.ByteString))
|
||||||
catObjectReader mv h = ifM (hIsEOF h)
|
catObjectReader c h = ifM (hIsEOF h)
|
||||||
( return Nothing
|
( return Nothing
|
||||||
, do
|
, do
|
||||||
(sha, f) <- liftIO $ atomically $ headTList mv
|
(ref, f) <- liftIO $ readChan c
|
||||||
resp <- S8.hGetLine h
|
resp <- S8.hGetLine h
|
||||||
case parseResp sha resp of
|
case parseResp ref resp of
|
||||||
Just r@(ParsedResp {}) -> do
|
Just r@(ParsedResp {}) -> do
|
||||||
content <- readObjectContent h r
|
content <- readObjectContent h r
|
||||||
return (Just (f, Just content))
|
return (Just (f, Just content))
|
||||||
|
@ -341,11 +348,10 @@ catObjectReader mv h = ifM (hIsEOF h)
|
||||||
withCatObjectStream
|
withCatObjectStream
|
||||||
:: (MonadMask m, MonadIO m)
|
:: (MonadMask m, MonadIO m)
|
||||||
=> Repo
|
=> Repo
|
||||||
-> (TList a -> Handle -> IO ())
|
-> (Chan a -> Handle -> Handle -> m ())
|
||||||
-> (TList a -> Handle -> m ())
|
|
||||||
-> m ()
|
-> m ()
|
||||||
withCatObjectStream repo feeder reader = assertLocal repo $
|
withCatObjectStream repo reader = assertLocal repo $
|
||||||
bracketIO start stop $ \(mv, _, _, hout, _) -> reader mv hout
|
bracketIO start stop $ \(c, hin, hout, _) -> reader c hin hout
|
||||||
where
|
where
|
||||||
params =
|
params =
|
||||||
[ Param "cat-file"
|
[ Param "cat-file"
|
||||||
|
@ -354,17 +360,15 @@ withCatObjectStream repo feeder reader = assertLocal repo $
|
||||||
]
|
]
|
||||||
|
|
||||||
start = do
|
start = do
|
||||||
mv <- liftIO $ atomically newTList
|
|
||||||
let p = gitCreateProcess params repo
|
let p = gitCreateProcess params repo
|
||||||
(Just hin, Just hout, _, pid) <- createProcess p
|
(Just hin, Just hout, _, pid) <- createProcess p
|
||||||
{ std_in = CreatePipe
|
{ std_in = CreatePipe
|
||||||
, std_out = CreatePipe
|
, std_out = CreatePipe
|
||||||
}
|
}
|
||||||
f <- async (feeder mv hin)
|
c <- newChan
|
||||||
return (mv, f, hin, hout, pid)
|
return (c, hin, hout, pid)
|
||||||
|
|
||||||
stop (_, f, hin, hout, pid) = do
|
stop (_, hin, hout, pid) = do
|
||||||
cancel f
|
|
||||||
hClose hin
|
hClose hin
|
||||||
hClose hout
|
hClose hout
|
||||||
void $ checkSuccessProcess pid
|
void $ checkSuccessProcess pid
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue