From 4f1fea1a856a2c82ed200e805bb18e9f9aaaa67b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 18 Sep 2011 22:40:31 -0400 Subject: [PATCH] fix memory leak filterM is not a good idea if you were streaming in a large list of files. Fixing this memory leak that I introduced earlier today was a PITA because to avoid the filterM, it's necessary to do the filtering only after building up the data structures like BackendFile, and that means each separate data structure needs it own function to apply the filter, at least in this naive implementation. There is also a minor performance regression, when using copy/drop/get/fsck with a filter, git is now asked to look up attributes for all files, since that now comes before the filter is applied. This is only a very minor thing, since getting the attributes is very fast and --exclude was probably not typically used to speed it up. --- Command.hs | 53 +++++++++++++++++++++++++++++++++-------------------- Limit.hs | 15 +++++++-------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/Command.hs b/Command.hs index fd58ca801a..3cfff268c8 100644 --- a/Command.hs +++ b/Command.hs @@ -107,17 +107,22 @@ notBareRepo a = do {- These functions find appropriate files or other things based on a user's parameters, and run a specified action on them. -} -withFilesInGit :: (String -> CommandStart) -> CommandSeek +withFilesInGit :: (FilePath -> CommandStart) -> CommandSeek withFilesInGit a params = do repo <- Annex.gitRepo - files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params - liftM (map a) $ filterFiles files + runFiltered a $ liftIO $ runPreserveOrder (LsFiles.inRepo repo) params withAttrFilesInGit :: String -> ((FilePath, String) -> CommandStart) -> CommandSeek withAttrFilesInGit attr a params = do repo <- Annex.gitRepo files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params - files' <- filterFiles files - liftM (map a) $ liftIO $ Git.checkAttr repo attr files' + run $ liftIO $ Git.checkAttr repo attr files + where + run fs = do + matcher <- Limit.getMatcher + liftM (map $ proc matcher) fs + proc matcher p@(f, _) = do + ok <- matcher f + if ok then a p else stop withNumCopies :: (FilePath -> Maybe Int -> CommandStart) -> CommandSeek withNumCopies a params = withAttrFilesInGit "annex.numcopies" go params where @@ -128,23 +133,17 @@ withBackendFilesInGit :: (BackendFile -> CommandStart) -> CommandSeek withBackendFilesInGit a params = do repo <- Annex.gitRepo files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params - files' <- filterFiles files - backendPairs a files' + backendPairs a files withFilesMissing :: (String -> CommandStart) -> CommandSeek -withFilesMissing a params = do - files <- liftIO $ filterM missing params - liftM (map a) $ filterFiles files +withFilesMissing a params = runFiltered a $ liftIO $ filterM missing params where - missing f = do - e <- doesFileExist f - return $ not e + missing = liftM not . doesFileExist withFilesNotInGit :: (BackendFile -> CommandStart) -> CommandSeek withFilesNotInGit a params = do repo <- Annex.gitRepo force <- Annex.getState Annex.force newfiles <- liftIO $ runPreserveOrder (LsFiles.notInRepo repo force) params - newfiles' <- filterFiles newfiles - backendPairs a newfiles' + backendPairs a newfiles withWords :: ([String] -> CommandStart) -> CommandSeek withWords a params = return [a params] withStrings :: (String -> CommandStart) -> CommandSeek @@ -152,8 +151,8 @@ withStrings a params = return $ map a params withFilesToBeCommitted :: (String -> CommandStart) -> CommandSeek withFilesToBeCommitted a params = do repo <- Annex.gitRepo - tocommit <- liftIO $ runPreserveOrder (LsFiles.stagedNotDeleted repo) params - liftM (map a) $ filterFiles tocommit + runFiltered a $ + liftIO $ runPreserveOrder (LsFiles.stagedNotDeleted repo) params withFilesUnlocked :: (BackendFile -> CommandStart) -> CommandSeek withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged withFilesUnlockedToBeCommitted :: (BackendFile -> CommandStart) -> CommandSeek @@ -165,8 +164,7 @@ withFilesUnlocked' typechanged a params = do typechangedfiles <- liftIO $ runPreserveOrder (typechanged repo) params unlockedfiles <- liftIO $ filterM notSymlink $ map (\f -> Git.workTree repo ++ "/" ++ f) typechangedfiles - unlockedfiles' <- filterFiles unlockedfiles - backendPairs a unlockedfiles' + backendPairs a unlockedfiles withKeys :: (Key -> CommandStart) -> CommandSeek withKeys a params = return $ map (a . parse) params where @@ -175,8 +173,23 @@ withNothing :: CommandStart -> CommandSeek withNothing a [] = return [a] withNothing _ _ = error "This command takes no parameters." +runFiltered :: (FilePath -> Annex (Maybe a)) -> Annex [FilePath] -> Annex [Annex (Maybe a)] +runFiltered a fs = do + matcher <- Limit.getMatcher + liftM (map $ proc matcher) fs + where + proc matcher f = do + ok <- matcher f + if ok then a f else stop + backendPairs :: (BackendFile -> CommandStart) -> CommandSeek -backendPairs a files = map a <$> Backend.chooseBackends files +backendPairs a fs = do + matcher <- Limit.getMatcher + liftM (map $ proc matcher) (Backend.chooseBackends fs) + where + proc matcher p@(_, f) = do + ok <- matcher f + if ok then a p else stop {- filter out symlinks -} notSymlink :: FilePath -> IO Bool diff --git a/Limit.hs b/Limit.hs index 51f3fc9505..602da70013 100644 --- a/Limit.hs +++ b/Limit.hs @@ -22,20 +22,19 @@ import Utility type Limit = Utility.Matcher.Token (FilePath -> Annex Bool) -{- Filter out files not matching user-specified limits. -} -filterFiles :: [FilePath] -> Annex [FilePath] -filterFiles l = do - matcher <- getMatcher - filterM (Utility.Matcher.matchM matcher) l - {- Checks if there are user-specified limits. -} limited :: Annex Bool -limited = (not . Utility.Matcher.matchesAny) <$> getMatcher +limited = (not . Utility.Matcher.matchesAny) <$> getMatcher' {- Gets a matcher for the user-specified limits. The matcher is cached for - speed; once it's obtained the user-specified limits can't change. -} -getMatcher :: Annex (Utility.Matcher.Matcher (FilePath -> Annex Bool)) +getMatcher :: Annex (FilePath -> Annex Bool) getMatcher = do + m <- getMatcher' + return $ Utility.Matcher.matchM m + +getMatcher' :: Annex (Utility.Matcher.Matcher (FilePath -> Annex Bool)) +getMatcher' = do m <- Annex.getState Annex.limit case m of Right r -> return r