git-annex/Utility/Directory.hs
Joey Hess d9d76cf98b Fix minor FD leak in journal code.
Minor because normally only 1 FD is leaked per git-annex run. However,
the test suite leaks a few hundred FDs, and this broke it on the Debian
autobuilders, which seem to have a tigher than usual ulimit.

The leak was introduced by the lazy getDirectoryContents' that was
introduced in e6330988dd in order to scale to
millions of journal files -- if the lazy list was never fully consumed, the
directory handle did not get closed.

Instead, pull in openDirectory/readDirectory/closeDirectory code that I
already developed and submitted in a patch to the haskell directory library
earlier. Using this in journalDirty avoids the place that the lazy list
caused a problem. And using it in stageJournal eliminates the need for
getDirectoryContents'.

The getJournalFiles* functions are switched back to using the regular
strict getDirectoryContents. I'm not sure if those always consume the whole
list, so this avoids any leak. And the things that call those are things
like git annex unused, which also look at every file committed to the
git-annex branch, so would need more work to scale to insane numbers of
files anyway.
2014-07-09 23:36:53 -04:00

217 lines
6.1 KiB
Haskell

{- directory traversal and manipulation
-
- Copyright 2011-2014 Joey Hess <joey@kitenet.net>
-
- License: BSD-2-clause
-}
{-# LANGUAGE CPP #-}
module Utility.Directory where
import System.IO.Error
import System.Directory
import Control.Exception (throw)
import Control.Monad
import Control.Monad.IfElse
import System.FilePath
import Control.Applicative
import Control.Concurrent
import System.IO.Unsafe (unsafeInterleaveIO)
import Data.Maybe
#ifdef mingw32_HOST_OS
import qualified System.Win32 as Win32
#else
import qualified System.Posix as Posix
#endif
import Utility.PosixFiles
import Utility.SafeCommand
import Utility.Tmp
import Utility.Exception
import Utility.Monad
import Utility.Applicative
dirCruft :: FilePath -> Bool
dirCruft "." = True
dirCruft ".." = True
dirCruft _ = False
{- Lists the contents of a directory.
- Unlike getDirectoryContents, paths are not relative to the directory. -}
dirContents :: FilePath -> IO [FilePath]
dirContents d = map (d </>) . filter (not . dirCruft) <$> getDirectoryContents d
{- Gets files in a directory, and then its subdirectories, recursively,
- and lazily.
-
- Does not follow symlinks to other subdirectories.
-
- When the directory does not exist, no exception is thrown,
- instead, [] is returned. -}
dirContentsRecursive :: FilePath -> IO [FilePath]
dirContentsRecursive = dirContentsRecursiveSkipping (const False) True
{- Skips directories whose basenames match the skipdir. -}
dirContentsRecursiveSkipping :: (FilePath -> Bool) -> Bool -> FilePath -> IO [FilePath]
dirContentsRecursiveSkipping skipdir followsubdirsymlinks topdir = go [topdir]
where
go [] = return []
go (dir:dirs)
| skipdir (takeFileName dir) = go dirs
| otherwise = unsafeInterleaveIO $ do
(files, dirs') <- collect [] []
=<< catchDefaultIO [] (dirContents dir)
files' <- go (dirs' ++ dirs)
return (files ++ files')
collect files dirs' [] = return (reverse files, reverse dirs')
collect files dirs' (entry:entries)
| dirCruft entry = collect files dirs' entries
| otherwise = do
let skip = collect (entry:files) dirs' entries
let recurse = collect files (entry:dirs') entries
ms <- catchMaybeIO $ getSymbolicLinkStatus entry
case ms of
(Just s)
| isDirectory s -> recurse
| isSymbolicLink s && followsubdirsymlinks ->
ifM (doesDirectoryExist entry)
( recurse
, skip
)
_ -> skip
{- Gets the directory tree from a point, recursively and lazily,
- with leaf directories **first**, skipping any whose basenames
- match the skipdir. Does not follow symlinks. -}
dirTreeRecursiveSkipping :: (FilePath -> Bool) -> FilePath -> IO [FilePath]
dirTreeRecursiveSkipping skipdir topdir = go [] [topdir]
where
go c [] = return c
go c (dir:dirs)
| skipdir (takeFileName dir) = go c dirs
| otherwise = unsafeInterleaveIO $ do
subdirs <- go c
=<< filterM (isDirectory <$$> getSymbolicLinkStatus)
=<< catchDefaultIO [] (dirContents dir)
go (subdirs++[dir]) dirs
{- Moves one filename to another.
- First tries a rename, but falls back to moving across devices if needed. -}
moveFile :: FilePath -> FilePath -> IO ()
moveFile src dest = tryIO (rename src dest) >>= onrename
where
onrename (Right _) = noop
onrename (Left e)
| isPermissionError e = rethrow
| isDoesNotExistError e = rethrow
| otherwise = do
-- copyFile is likely not as optimised as
-- the mv command, so we'll use the latter.
-- But, mv will move into a directory if
-- dest is one, which is not desired.
whenM (isdir dest) rethrow
viaTmp mv dest undefined
where
rethrow = throw e
mv tmp _ = do
ok <- boolSystem "mv" [Param "-f", Param src, Param tmp]
unless ok $ do
-- delete any partial
_ <- tryIO $ removeFile tmp
rethrow
isdir f = do
r <- tryIO $ getFileStatus f
case r of
(Left _) -> return False
(Right s) -> return $ isDirectory s
{- Removes a file, which may or may not exist, and does not have to
- be a regular file.
-
- Note that an exception is thrown if the file exists but
- cannot be removed. -}
nukeFile :: FilePath -> IO ()
nukeFile file = void $ tryWhenExists go
where
#ifndef mingw32_HOST_OS
go = removeLink file
#else
go = removeFile file
#endif
#ifndef mingw32_HOST_OS
data DirectoryHandle = DirectoryHandle IsOpen Posix.DirStream
#else
data DirectoryHandle = DirectoryHandle IsOpen Win32.HANDLE Win32.FindData (MVar ())
#endif
type IsOpen = MVar () -- full when the handle is open
openDirectory :: FilePath -> IO DirectoryHandle
openDirectory path = do
#ifndef mingw32_HOST_OS
dirp <- Posix.openDirStream path
isopen <- newMVar ()
return (DirectoryHandle isopen dirp)
#else
(h, fdat) <- Win32.findFirstFile (path </> "*")
-- Indicate that the fdat contains a filename that readDirectory
-- has not yet returned, by making the MVar be full.
-- (There's always at least a "." entry.)
alreadyhave <- newMVar ()
isopen <- newMVar ()
return (DirectoryHandle isopen h fdat alreadyhave)
#endif
closeDirectory :: DirectoryHandle -> IO ()
#ifndef mingw32_HOST_OS
closeDirectory (DirectoryHandle isopen dirp) =
whenOpen isopen $
Posix.closeDirStream dirp
#else
closeDirectory (DirectoryHandle isopen h _ alreadyhave) =
whenOpen isopen $ do
_ <- tryTakeMVar alreadyhave
Win32.findClose h
#endif
where
whenOpen :: IsOpen -> IO () -> IO ()
whenOpen mv f = do
v <- tryTakeMVar mv
when (isJust v) f
{- |Reads the next entry from the handle. Once the end of the directory
is reached, returns Nothing and automatically closes the handle.
-}
readDirectory :: DirectoryHandle -> IO (Maybe FilePath)
#ifndef mingw32_HOST_OS
readDirectory hdl@(DirectoryHandle _ dirp) = do
e <- Posix.readDirStream dirp
if null e
then do
closeDirectory hdl
return Nothing
else return (Just e)
#else
readDirectory hdl@(DirectoryHandle _ h fdat mv) = do
-- If the MVar is full, then the filename in fdat has
-- not yet been returned. Otherwise, need to find the next
-- file.
r <- tryTakeMVar mv
case r of
Just () -> getfn
Nothing -> do
more <- Win32.findNextFile h fdat
if more
then getfn
else do
closeDirectory hdl
return Nothing
where
getfn = do
filename <- Win32.getFindDataFileName fdat
return (Just filename)
#endif