work around catObjectStream polymorism perf

Breaking it up like this doesn't change perf, and lets another version
be written in just a couple lines.
This commit is contained in:
Joey Hess 2020-07-09 13:25:34 -04:00
parent 9f6bd6cc05
commit cb6e19f4c5
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38

View file

@ -21,6 +21,7 @@ module Git.CatFile (
catObjectDetails, catObjectDetails,
catObjectMetaData, catObjectMetaData,
catObjectStream, catObjectStream,
catObjectStream',
) where ) where
import System.IO import System.IO
@ -283,12 +284,8 @@ parseCommit b = Commit
lt = fromIntegral (ord '<') lt = fromIntegral (ord '<')
gt = fromIntegral (ord '>') gt = fromIntegral (ord '>')
{- Uses cat-file to stream the contents of the files listed by lstree {- Uses cat-file to stream the contents of the files as efficiently
- as efficiently as possible. This is much faster than querying it - as possible. This is much faster than querying it repeatedly per file.
- repeatedly per file.
-
- (Note that, while a more polymorphic version of this can be written,
- this version is faster, possibly due to being less polymorphic.)
-} -}
catObjectStream catObjectStream
:: (MonadMask m, MonadIO m) :: (MonadMask m, MonadIO m)
@ -297,8 +294,8 @@ catObjectStream
-> Repo -> Repo
-> (IO (Maybe (TopFilePath, Maybe L.ByteString)) -> m ()) -> (IO (Maybe (TopFilePath, Maybe L.ByteString)) -> m ())
-> m () -> m ()
catObjectStream l want repo a = assertLocal repo $ do catObjectStream l want repo a = withCatObjectStream repo feeder $
bracketIO start stop $ \(mv, _, _, hout, _) -> a (reader mv hout) \mv hout -> a (catObjectReader mv hout)
where where
feeder mv h = do feeder mv h = do
forM_ l $ \ti -> forM_ l $ \ti ->
@ -309,19 +306,47 @@ catObjectStream l want repo a = assertLocal repo $ do
S8.hPutStrLn h (fromRef' sha) S8.hPutStrLn h (fromRef' sha)
hClose h hClose h
reader mv h = ifM (hIsEOF h) {- While this variant could be combined with catObjectStream into a
( return Nothing - more polymorphic function, the specialization of both is important for
, do - performance. -}
(sha, f) <- liftIO $ atomically $ headTList mv catObjectStream'
resp <- S8.hGetLine h :: (MonadMask m, MonadIO m)
case parseResp sha resp of => [(RawFilePath, Sha, FileMode)]
Just r@(ParsedResp {}) -> do -> Repo
content <- readObjectContent h r -> (IO (Maybe (RawFilePath, Maybe L.ByteString)) -> m ())
return (Just (f, Just content)) -> m ()
Just DNE -> return (Just (f, Nothing)) catObjectStream' l repo a = withCatObjectStream repo feeder $
Nothing -> error $ "unknown response from git cat-file " ++ show resp \mv hout -> a (catObjectReader mv hout)
) where
feeder mv h = do
forM_ l $ \(f, sha, _) -> do
liftIO $ atomically $ snocTList mv (sha, f)
S8.hPutStrLn h (fromRef' sha)
hClose h
catObjectReader :: TList (Ref, a) -> Handle -> IO (Maybe (a, Maybe L.ByteString))
catObjectReader mv h = ifM (hIsEOF h)
( return Nothing
, do
(sha, f) <- liftIO $ atomically $ headTList mv
resp <- S8.hGetLine h
case parseResp sha resp of
Just r@(ParsedResp {}) -> do
content <- readObjectContent h r
return (Just (f, Just content))
Just DNE -> return (Just (f, Nothing))
Nothing -> error $ "unknown response from git cat-file " ++ show resp
)
withCatObjectStream
:: (MonadMask m, MonadIO m)
=> Repo
-> (TList a -> Handle -> IO ())
-> (TList a -> Handle -> m ())
-> m ()
withCatObjectStream repo feeder reader = assertLocal repo $
bracketIO start stop $ \(mv, _, _, hout, _) -> reader mv hout
where
params = params =
[ Param "cat-file" [ Param "cat-file"
, Param ("--batch=" ++ batchFormat) , Param ("--batch=" ++ batchFormat)