fix memory leak

filterM is not a good idea if you were streaming in a large list of files.

Fixing this memory leak that I introduced earlier today was a PITA because
to avoid the filterM, it's necessary to do the filtering only after
building up the data structures like BackendFile, and that means each
separate data structure needs it own function to apply the filter,
at least in this naive implementation.

There is also a minor performance regression, when using copy/drop/get/fsck
with a filter, git is now asked to look up attributes for all files,
since that now comes before the filter is applied. This is only a very
minor thing, since getting the attributes is very fast and --exclude was
probably not typically used to speed it up.
This commit is contained in:
Joey Hess 2011-09-18 22:40:31 -04:00
parent 8d1e8c0760
commit 4f1fea1a85
2 changed files with 40 additions and 28 deletions

View file

@ -107,17 +107,22 @@ notBareRepo a = do
{- These functions find appropriate files or other things based on a {- These functions find appropriate files or other things based on a
user's parameters, and run a specified action on them. -} user's parameters, and run a specified action on them. -}
withFilesInGit :: (String -> CommandStart) -> CommandSeek withFilesInGit :: (FilePath -> CommandStart) -> CommandSeek
withFilesInGit a params = do withFilesInGit a params = do
repo <- Annex.gitRepo repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params runFiltered a $ liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
liftM (map a) $ filterFiles files
withAttrFilesInGit :: String -> ((FilePath, String) -> CommandStart) -> CommandSeek withAttrFilesInGit :: String -> ((FilePath, String) -> CommandStart) -> CommandSeek
withAttrFilesInGit attr a params = do withAttrFilesInGit attr a params = do
repo <- Annex.gitRepo repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
files' <- filterFiles files run $ liftIO $ Git.checkAttr repo attr files
liftM (map a) $ liftIO $ Git.checkAttr repo attr files' where
run fs = do
matcher <- Limit.getMatcher
liftM (map $ proc matcher) fs
proc matcher p@(f, _) = do
ok <- matcher f
if ok then a p else stop
withNumCopies :: (FilePath -> Maybe Int -> CommandStart) -> CommandSeek withNumCopies :: (FilePath -> Maybe Int -> CommandStart) -> CommandSeek
withNumCopies a params = withAttrFilesInGit "annex.numcopies" go params withNumCopies a params = withAttrFilesInGit "annex.numcopies" go params
where where
@ -128,23 +133,17 @@ withBackendFilesInGit :: (BackendFile -> CommandStart) -> CommandSeek
withBackendFilesInGit a params = do withBackendFilesInGit a params = do
repo <- Annex.gitRepo repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
files' <- filterFiles files backendPairs a files
backendPairs a files'
withFilesMissing :: (String -> CommandStart) -> CommandSeek withFilesMissing :: (String -> CommandStart) -> CommandSeek
withFilesMissing a params = do withFilesMissing a params = runFiltered a $ liftIO $ filterM missing params
files <- liftIO $ filterM missing params
liftM (map a) $ filterFiles files
where where
missing f = do missing = liftM not . doesFileExist
e <- doesFileExist f
return $ not e
withFilesNotInGit :: (BackendFile -> CommandStart) -> CommandSeek withFilesNotInGit :: (BackendFile -> CommandStart) -> CommandSeek
withFilesNotInGit a params = do withFilesNotInGit a params = do
repo <- Annex.gitRepo repo <- Annex.gitRepo
force <- Annex.getState Annex.force force <- Annex.getState Annex.force
newfiles <- liftIO $ runPreserveOrder (LsFiles.notInRepo repo force) params newfiles <- liftIO $ runPreserveOrder (LsFiles.notInRepo repo force) params
newfiles' <- filterFiles newfiles backendPairs a newfiles
backendPairs a newfiles'
withWords :: ([String] -> CommandStart) -> CommandSeek withWords :: ([String] -> CommandStart) -> CommandSeek
withWords a params = return [a params] withWords a params = return [a params]
withStrings :: (String -> CommandStart) -> CommandSeek withStrings :: (String -> CommandStart) -> CommandSeek
@ -152,8 +151,8 @@ withStrings a params = return $ map a params
withFilesToBeCommitted :: (String -> CommandStart) -> CommandSeek withFilesToBeCommitted :: (String -> CommandStart) -> CommandSeek
withFilesToBeCommitted a params = do withFilesToBeCommitted a params = do
repo <- Annex.gitRepo repo <- Annex.gitRepo
tocommit <- liftIO $ runPreserveOrder (LsFiles.stagedNotDeleted repo) params runFiltered a $
liftM (map a) $ filterFiles tocommit liftIO $ runPreserveOrder (LsFiles.stagedNotDeleted repo) params
withFilesUnlocked :: (BackendFile -> CommandStart) -> CommandSeek withFilesUnlocked :: (BackendFile -> CommandStart) -> CommandSeek
withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged
withFilesUnlockedToBeCommitted :: (BackendFile -> CommandStart) -> CommandSeek withFilesUnlockedToBeCommitted :: (BackendFile -> CommandStart) -> CommandSeek
@ -165,8 +164,7 @@ withFilesUnlocked' typechanged a params = do
typechangedfiles <- liftIO $ runPreserveOrder (typechanged repo) params typechangedfiles <- liftIO $ runPreserveOrder (typechanged repo) params
unlockedfiles <- liftIO $ filterM notSymlink $ unlockedfiles <- liftIO $ filterM notSymlink $
map (\f -> Git.workTree repo ++ "/" ++ f) typechangedfiles map (\f -> Git.workTree repo ++ "/" ++ f) typechangedfiles
unlockedfiles' <- filterFiles unlockedfiles backendPairs a unlockedfiles
backendPairs a unlockedfiles'
withKeys :: (Key -> CommandStart) -> CommandSeek withKeys :: (Key -> CommandStart) -> CommandSeek
withKeys a params = return $ map (a . parse) params withKeys a params = return $ map (a . parse) params
where where
@ -175,8 +173,23 @@ withNothing :: CommandStart -> CommandSeek
withNothing a [] = return [a] withNothing a [] = return [a]
withNothing _ _ = error "This command takes no parameters." withNothing _ _ = error "This command takes no parameters."
runFiltered :: (FilePath -> Annex (Maybe a)) -> Annex [FilePath] -> Annex [Annex (Maybe a)]
runFiltered a fs = do
matcher <- Limit.getMatcher
liftM (map $ proc matcher) fs
where
proc matcher f = do
ok <- matcher f
if ok then a f else stop
backendPairs :: (BackendFile -> CommandStart) -> CommandSeek backendPairs :: (BackendFile -> CommandStart) -> CommandSeek
backendPairs a files = map a <$> Backend.chooseBackends files backendPairs a fs = do
matcher <- Limit.getMatcher
liftM (map $ proc matcher) (Backend.chooseBackends fs)
where
proc matcher p@(_, f) = do
ok <- matcher f
if ok then a p else stop
{- filter out symlinks -} {- filter out symlinks -}
notSymlink :: FilePath -> IO Bool notSymlink :: FilePath -> IO Bool

View file

@ -22,20 +22,19 @@ import Utility
type Limit = Utility.Matcher.Token (FilePath -> Annex Bool) type Limit = Utility.Matcher.Token (FilePath -> Annex Bool)
{- Filter out files not matching user-specified limits. -}
filterFiles :: [FilePath] -> Annex [FilePath]
filterFiles l = do
matcher <- getMatcher
filterM (Utility.Matcher.matchM matcher) l
{- Checks if there are user-specified limits. -} {- Checks if there are user-specified limits. -}
limited :: Annex Bool limited :: Annex Bool
limited = (not . Utility.Matcher.matchesAny) <$> getMatcher limited = (not . Utility.Matcher.matchesAny) <$> getMatcher'
{- Gets a matcher for the user-specified limits. The matcher is cached for {- Gets a matcher for the user-specified limits. The matcher is cached for
- speed; once it's obtained the user-specified limits can't change. -} - speed; once it's obtained the user-specified limits can't change. -}
getMatcher :: Annex (Utility.Matcher.Matcher (FilePath -> Annex Bool)) getMatcher :: Annex (FilePath -> Annex Bool)
getMatcher = do getMatcher = do
m <- getMatcher'
return $ Utility.Matcher.matchM m
getMatcher' :: Annex (Utility.Matcher.Matcher (FilePath -> Annex Bool))
getMatcher' = do
m <- Annex.getState Annex.limit m <- Annex.getState Annex.limit
case m of case m of
Right r -> return r Right r -> return r