Fix minor FD leak in journal code.

Minor because normally only 1 FD is leaked per git-annex run. However,
the test suite leaks a few hundred FDs, and this broke it on the Debian
autobuilders, which seem to have a tigher than usual ulimit.

The leak was introduced by the lazy getDirectoryContents' that was
introduced in e6330988dd in order to scale to
millions of journal files -- if the lazy list was never fully consumed, the
directory handle did not get closed.

Instead, pull in openDirectory/readDirectory/closeDirectory code that I
already developed and submitted in a patch to the haskell directory library
earlier. Using this in journalDirty avoids the place that the lazy list
caused a problem. And using it in stageJournal eliminates the need for
getDirectoryContents'.

The getJournalFiles* functions are switched back to using the regular
strict getDirectoryContents. I'm not sure if those always consume the whole
list, so this avoids any leak. And the things that call those are things
like git annex unused, which also look at every file committed to the
git-annex branch, so would need more work to scale to insane numbers of
files anyway.
This commit is contained in:
Joey Hess 2014-07-09 23:36:53 -04:00
parent 99d3010618
commit d9d76cf98b
4 changed files with 114 additions and 50 deletions

View file

@ -389,21 +389,26 @@ stageJournal jl = withIndex $ do
prepareModifyIndex jl
g <- gitRepo
let dir = gitAnnexJournalDir g
fs <- getJournalFiles jl
(jlogf, jlogh) <- openjlog
liftIO $ do
withJournalHandle $ \jh -> do
h <- hashObjectStart g
Git.UpdateIndex.streamUpdateIndex g
[genstream dir h fs jlogh]
[genstream dir h jh jlogh]
hashObjectStop h
return $ cleanup dir jlogh jlogf
where
genstream dir h fs jlogh streamer = forM_ fs $ \file -> do
let path = dir </> file
sha <- hashFile h path
hPutStrLn jlogh file
streamer $ Git.UpdateIndex.updateIndexLine
sha FileBlob (asTopFilePath $ fileJournal file)
genstream dir h jh jlogh streamer = do
v <- readDirectory jh
case v of
Nothing -> return ()
Just file -> do
unless (dirCruft file) $ do
let path = dir </> file
sha <- hashFile h path
hPutStrLn jlogh file
streamer $ Git.UpdateIndex.updateIndexLine
sha FileBlob (asTopFilePath $ fileJournal file)
genstream dir h jh jlogh streamer
-- Clean up the staged files, as listed in the temp log file.
-- The temp file is used to avoid needing to buffer all the
-- filenames in memory.

View file

@ -77,12 +77,27 @@ getJournalFilesStale :: Annex [FilePath]
getJournalFilesStale = do
g <- gitRepo
fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents' $ gitAnnexJournalDir g
getDirectoryContents $ gitAnnexJournalDir g
return $ filter (`notElem` [".", ".."]) fs
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
withJournalHandle a = do
d <- fromRepo gitAnnexJournalDir
bracketIO (openDirectory d) closeDirectory (liftIO . a)
{- Checks if there are changes in the journal. -}
journalDirty :: Annex Bool
journalDirty = not . null <$> getJournalFilesStale
journalDirty = withJournalHandle go
where
go h = do
v <- readDirectory h
case v of
(Just f)
| not (dirCruft f) -> do
closeDirectory h
return True
| otherwise -> go h
Nothing -> return False
{- Produces a filename to use in the journal for a file on the branch.
-

View file

@ -1,4 +1,4 @@
{- directory manipulation
{- directory traversal and manipulation
-
- Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
@ -16,7 +16,9 @@ import Control.Monad
import Control.Monad.IfElse
import System.FilePath
import Control.Applicative
import Control.Concurrent
import System.IO.Unsafe (unsafeInterleaveIO)
import Data.Maybe
#ifdef mingw32_HOST_OS
import qualified System.Win32 as Win32
@ -31,44 +33,6 @@ import Utility.Exception
import Utility.Monad
import Utility.Applicative
{- Unlike getDirectoryContents, this can be used in arbitrarily
- large directories without using much memory; the list steams lazily.
-
- However, any errors that may be encountered while reading the directory
- contents are *ignored*, rather than throw them in the context of
- whatever code consumes the lazy list.
-
- See https://ghc.haskell.org/trac/ghc/ticket/9266
-}
getDirectoryContents' :: FilePath -> IO [FilePath]
getDirectoryContents' path = loop =<< opendir
where
#ifndef mingw32_HOST_OS
opendir = Posix.openDirStream path
loop dirp = do
v <- tryNonAsync $ Posix.readDirStream dirp
case v of
(Right ent) | not (null ent) -> do
rest <- unsafeInterleaveIO (loop dirp)
return (ent:rest)
_ -> do
void $ tryNonAsync $ Posix.closeDirStream dirp
return []
#else
opendir = Win32.findFirstFile (path </> "*")
loop (h, fdat) = do
-- there is always at least 1 file ("." and "..")
ent <- Win32.getFindDataFileName fdat
v <- tryNonAsync $ Win32.findNextFile h fdat
case v of
Right True -> do
rest <- unsafeInterleaveIO (loop (h, fdat))
return (ent:rest)
_ -> do
void $ tryNonAsync $ Win32.findClose h
return [ent]
#endif
dirCruft :: FilePath -> Bool
dirCruft "." = True
dirCruft ".." = True
@ -177,3 +141,77 @@ nukeFile file = void $ tryWhenExists go
#else
go = removeFile file
#endif
#ifndef mingw32_HOST_OS
data DirectoryHandle = DirectoryHandle IsOpen Posix.DirStream
#else
data DirectoryHandle = DirectoryHandle IsOpen Win32.HANDLE Win32.FindData (MVar ())
#endif
type IsOpen = MVar () -- full when the handle is open
openDirectory :: FilePath -> IO DirectoryHandle
openDirectory path = do
#ifndef mingw32_HOST_OS
dirp <- Posix.openDirStream path
isopen <- newMVar ()
return (DirectoryHandle isopen dirp)
#else
(h, fdat) <- Win32.findFirstFile (path </> "*")
-- Indicate that the fdat contains a filename that readDirectory
-- has not yet returned, by making the MVar be full.
-- (There's always at least a "." entry.)
alreadyhave <- newMVar ()
isopen <- newMVar ()
return (DirectoryHandle isopen h fdat alreadyhave)
#endif
closeDirectory :: DirectoryHandle -> IO ()
#ifndef mingw32_HOST_OS
closeDirectory (DirectoryHandle isopen dirp) =
whenOpen isopen $
Posix.closeDirStream dirp
#else
closeDirectory (DirectoryHandle isopen h _ alreadyhave) =
whenOpen isopen $ do
_ <- tryTakeMVar alreadyhave
Win32.findClose h
#endif
where
whenOpen :: IsOpen -> IO () -> IO ()
whenOpen mv f = do
v <- tryTakeMVar mv
when (isJust v) f
{- |Reads the next entry from the handle. Once the end of the directory
is reached, returns Nothing and automatically closes the handle.
-}
readDirectory :: DirectoryHandle -> IO (Maybe FilePath)
#ifndef mingw32_HOST_OS
readDirectory hdl@(DirectoryHandle _ dirp) = do
e <- Posix.readDirStream dirp
if null e
then do
closeDirectory hdl
return Nothing
else return (Just e)
#else
readDirectory hdl@(DirectoryHandle _ h fdat mv) = do
-- If the MVar is full, then the filename in fdat has
-- not yet been returned. Otherwise, need to find the next
-- file.
r <- tryTakeMVar mv
case r of
Just () -> getfn
Nothing -> do
more <- Win32.findNextFile h fdat
if more
then getfn
else do
closeDirectory hdl
return Nothing
where
getfn = do
filename <- Win32.getFindDataFileName fdat
return (Just filename)
#endif

6
debian/changelog vendored
View file

@ -1,3 +1,9 @@
git-annex (5.20140710) UNRELEASED; urgency=medium
* Fix minor FD leak in journal code.
-- Joey Hess <joeyh@debian.org> Wed, 09 Jul 2014 23:29:21 -0400
git-annex (5.20140709) unstable; urgency=medium
* Fix race in direct mode merge code that could cause all files in the