Fix minor FD leak in journal code.
Minor because normally only 1 FD is leaked per git-annex run. However,
the test suite leaks a few hundred FDs, and this broke it on the Debian
autobuilders, which seem to have a tigher than usual ulimit.
The leak was introduced by the lazy getDirectoryContents' that was
introduced in e6330988dd
in order to scale to
millions of journal files -- if the lazy list was never fully consumed, the
directory handle did not get closed.
Instead, pull in openDirectory/readDirectory/closeDirectory code that I
already developed and submitted in a patch to the haskell directory library
earlier. Using this in journalDirty avoids the place that the lazy list
caused a problem. And using it in stageJournal eliminates the need for
getDirectoryContents'.
The getJournalFiles* functions are switched back to using the regular
strict getDirectoryContents. I'm not sure if those always consume the whole
list, so this avoids any leak. And the things that call those are things
like git annex unused, which also look at every file committed to the
git-annex branch, so would need more work to scale to insane numbers of
files anyway.
This commit is contained in:
parent
99d3010618
commit
d9d76cf98b
4 changed files with 114 additions and 50 deletions
|
@ -389,21 +389,26 @@ stageJournal jl = withIndex $ do
|
|||
prepareModifyIndex jl
|
||||
g <- gitRepo
|
||||
let dir = gitAnnexJournalDir g
|
||||
fs <- getJournalFiles jl
|
||||
(jlogf, jlogh) <- openjlog
|
||||
liftIO $ do
|
||||
withJournalHandle $ \jh -> do
|
||||
h <- hashObjectStart g
|
||||
Git.UpdateIndex.streamUpdateIndex g
|
||||
[genstream dir h fs jlogh]
|
||||
[genstream dir h jh jlogh]
|
||||
hashObjectStop h
|
||||
return $ cleanup dir jlogh jlogf
|
||||
where
|
||||
genstream dir h fs jlogh streamer = forM_ fs $ \file -> do
|
||||
let path = dir </> file
|
||||
sha <- hashFile h path
|
||||
hPutStrLn jlogh file
|
||||
streamer $ Git.UpdateIndex.updateIndexLine
|
||||
sha FileBlob (asTopFilePath $ fileJournal file)
|
||||
genstream dir h jh jlogh streamer = do
|
||||
v <- readDirectory jh
|
||||
case v of
|
||||
Nothing -> return ()
|
||||
Just file -> do
|
||||
unless (dirCruft file) $ do
|
||||
let path = dir </> file
|
||||
sha <- hashFile h path
|
||||
hPutStrLn jlogh file
|
||||
streamer $ Git.UpdateIndex.updateIndexLine
|
||||
sha FileBlob (asTopFilePath $ fileJournal file)
|
||||
genstream dir h jh jlogh streamer
|
||||
-- Clean up the staged files, as listed in the temp log file.
|
||||
-- The temp file is used to avoid needing to buffer all the
|
||||
-- filenames in memory.
|
||||
|
|
|
@ -77,12 +77,27 @@ getJournalFilesStale :: Annex [FilePath]
|
|||
getJournalFilesStale = do
|
||||
g <- gitRepo
|
||||
fs <- liftIO $ catchDefaultIO [] $
|
||||
getDirectoryContents' $ gitAnnexJournalDir g
|
||||
getDirectoryContents $ gitAnnexJournalDir g
|
||||
return $ filter (`notElem` [".", ".."]) fs
|
||||
|
||||
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
|
||||
withJournalHandle a = do
|
||||
d <- fromRepo gitAnnexJournalDir
|
||||
bracketIO (openDirectory d) closeDirectory (liftIO . a)
|
||||
|
||||
{- Checks if there are changes in the journal. -}
|
||||
journalDirty :: Annex Bool
|
||||
journalDirty = not . null <$> getJournalFilesStale
|
||||
journalDirty = withJournalHandle go
|
||||
where
|
||||
go h = do
|
||||
v <- readDirectory h
|
||||
case v of
|
||||
(Just f)
|
||||
| not (dirCruft f) -> do
|
||||
closeDirectory h
|
||||
return True
|
||||
| otherwise -> go h
|
||||
Nothing -> return False
|
||||
|
||||
{- Produces a filename to use in the journal for a file on the branch.
|
||||
-
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue