alternative implementation of pipeNullSplit not using lazy bytestring

Written to try to fix a memory leak involving PINNED memory. While it
does not do that, it is in many ways a nicer approach and may be worth
integrating at some point once the memory leak is understood.

The S.copy is probably unncessary, I just wanted to make sure I didn't
have one chunk pinning other chunks of the same ByteString.

This commit was sponsored by Jochen Bartl on Patreon.
This commit is contained in:
Joey Hess 2020-10-13 13:34:59 -04:00
parent cf0ff9f53d
commit 67424e6498
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 60 additions and 6 deletions

View file

@ -16,6 +16,9 @@ import qualified Utility.CoProcess as CoProcess
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import Data.IORef
import qualified Data.DList as DL
import System.IO.Unsafe (unsafeInterleaveIO)
{- Constructs a git command line operating on the specified repo. -}
gitCommandLine :: [CommandParam] -> Repo -> [CommandParam]
@ -129,6 +132,55 @@ pipeNullSplit' params repo = do
(s, cleanup) <- pipeNullSplit params repo
return (map L.toStrict s, cleanup)
pipeNullSplit'' :: [CommandParam] -> Repo -> IO [S.ByteString]
pipeNullSplit'' params repo = do
getchunk <- pipeNullChunks params repo
go getchunk DL.empty
where
go getchunk l = unsafeInterleaveIO getchunk >>= \case
Just c -> go getchunk (DL.cons c l)
Nothing -> return (DL.toList l)
{- Returns an action that reads the next null terminated chunk of the
- command's output, or Nothing at EOF. -}
pipeNullChunks :: [CommandParam] -> Repo -> IO (IO (Maybe S.ByteString))
pipeNullChunks params repo = do
(_, Just h, _, pid) <- createProcess p { std_out = CreatePipe }
bv <- newIORef []
return (getchunk pid bv h)
where
p = gitCreateProcess params repo
-- Larger than the typical git output chunk, but not so large
-- that it will be a problem for part of it to remain
-- in memory while proccessing the rest of it.
maxsz = 1024
getchunk pid bv h = readIORef bv >>= \case
(prev:[]) -> continuechunk pid bv h prev
(c:cs) -> do
writeIORef bv cs
return (Just c)
[] -> continuechunk pid bv h S.empty
continuechunk pid bv h prev = do
s <- S.hGetSome h maxsz
case S.split 0 s of
(c:[]) ->
-- Chunk is still incomplete.
continuechunk pid bv h (prev <> c)
(c:rest) -> do
writeIORef bv (filter (not . S.null) rest)
return (Just (S.copy (prev <> c)))
[] -> do
-- Empty read; at EOF.
void $ checkSuccessProcess pid
-- Make subsequent calls return Nothing
-- to indicate EOF to the caller, if this
-- call does not.
writeIORef bv []
return (if S.null prev then Nothing else Just prev)
pipeNullSplitStrict :: [CommandParam] -> Repo -> IO [S.ByteString]
pipeNullSplitStrict params repo = do
s <- pipeReadStrict params repo

View file

@ -5,6 +5,8 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Git.LsFiles (
Options(..),
inRepo,
@ -175,8 +177,8 @@ stagedDetails = stagedDetails' parseStagedDetails []
stagedDetails' :: (S.ByteString -> Maybe t) -> [CommandParam] -> [RawFilePath] -> Repo -> IO ([t], IO Bool)
stagedDetails' parser ps l repo = guardSafeForLsFiles repo $ do
(ls, cleanup) <- pipeNullSplit' params repo
return (mapMaybe parser ls, cleanup)
ls <- pipeNullSplit'' params repo
return (mapMaybe parser ls, return True)
where
params = Param "ls-files" : Param "--stage" : Param "-z" : ps ++
Param "--" : map (File . fromRawFilePath) l
@ -185,13 +187,13 @@ parseStagedDetails :: S.ByteString -> Maybe StagedDetails
parseStagedDetails = eitherToMaybe . A.parseOnly parser
where
parser = do
mode <- octal
!mode <- octal
void $ A8.char ' '
sha <- maybe (fail "bad sha") return . extractSha =<< nextword
!sha <- maybe (fail "bad sha") return . extractSha . S.copy =<< nextword
void $ A8.char ' '
stagenum <- A8.decimal
!stagenum <- A8.decimal
void $ A8.char '\t'
file <- A.takeByteString
!file <- S.copy <$> A.takeByteString
return (file, sha, mode, stagenum)
nextword = A8.takeTill (== ' ')