fix memory leak

filterM is not a good idea if you were streaming in a large list of files.

Fixing this memory leak that I introduced earlier today was a PITA because
to avoid the filterM, it's necessary to do the filtering only after
building up the data structures like BackendFile, and that means each
separate data structure needs it own function to apply the filter,
at least in this naive implementation.

There is also a minor performance regression, when using copy/drop/get/fsck
with a filter, git is now asked to look up attributes for all files,
since that now comes before the filter is applied. This is only a very
minor thing, since getting the attributes is very fast and --exclude was
probably not typically used to speed it up.
This commit is contained in:
Joey Hess 2011-09-18 22:40:31 -04:00
parent 8d1e8c0760
commit 4f1fea1a85
2 changed files with 40 additions and 28 deletions

View file

@ -107,17 +107,22 @@ notBareRepo a = do
{- These functions find appropriate files or other things based on a
user's parameters, and run a specified action on them. -}
withFilesInGit :: (String -> CommandStart) -> CommandSeek
withFilesInGit :: (FilePath -> CommandStart) -> CommandSeek
withFilesInGit a params = do
repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
liftM (map a) $ filterFiles files
runFiltered a $ liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
withAttrFilesInGit :: String -> ((FilePath, String) -> CommandStart) -> CommandSeek
withAttrFilesInGit attr a params = do
repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
files' <- filterFiles files
liftM (map a) $ liftIO $ Git.checkAttr repo attr files'
run $ liftIO $ Git.checkAttr repo attr files
where
run fs = do
matcher <- Limit.getMatcher
liftM (map $ proc matcher) fs
proc matcher p@(f, _) = do
ok <- matcher f
if ok then a p else stop
withNumCopies :: (FilePath -> Maybe Int -> CommandStart) -> CommandSeek
withNumCopies a params = withAttrFilesInGit "annex.numcopies" go params
where
@ -128,23 +133,17 @@ withBackendFilesInGit :: (BackendFile -> CommandStart) -> CommandSeek
withBackendFilesInGit a params = do
repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
files' <- filterFiles files
backendPairs a files'
backendPairs a files
withFilesMissing :: (String -> CommandStart) -> CommandSeek
withFilesMissing a params = do
files <- liftIO $ filterM missing params
liftM (map a) $ filterFiles files
withFilesMissing a params = runFiltered a $ liftIO $ filterM missing params
where
missing f = do
e <- doesFileExist f
return $ not e
missing = liftM not . doesFileExist
withFilesNotInGit :: (BackendFile -> CommandStart) -> CommandSeek
withFilesNotInGit a params = do
repo <- Annex.gitRepo
force <- Annex.getState Annex.force
newfiles <- liftIO $ runPreserveOrder (LsFiles.notInRepo repo force) params
newfiles' <- filterFiles newfiles
backendPairs a newfiles'
backendPairs a newfiles
withWords :: ([String] -> CommandStart) -> CommandSeek
withWords a params = return [a params]
withStrings :: (String -> CommandStart) -> CommandSeek
@ -152,8 +151,8 @@ withStrings a params = return $ map a params
withFilesToBeCommitted :: (String -> CommandStart) -> CommandSeek
withFilesToBeCommitted a params = do
repo <- Annex.gitRepo
tocommit <- liftIO $ runPreserveOrder (LsFiles.stagedNotDeleted repo) params
liftM (map a) $ filterFiles tocommit
runFiltered a $
liftIO $ runPreserveOrder (LsFiles.stagedNotDeleted repo) params
withFilesUnlocked :: (BackendFile -> CommandStart) -> CommandSeek
withFilesUnlocked = withFilesUnlocked' LsFiles.typeChanged
withFilesUnlockedToBeCommitted :: (BackendFile -> CommandStart) -> CommandSeek
@ -165,8 +164,7 @@ withFilesUnlocked' typechanged a params = do
typechangedfiles <- liftIO $ runPreserveOrder (typechanged repo) params
unlockedfiles <- liftIO $ filterM notSymlink $
map (\f -> Git.workTree repo ++ "/" ++ f) typechangedfiles
unlockedfiles' <- filterFiles unlockedfiles
backendPairs a unlockedfiles'
backendPairs a unlockedfiles
withKeys :: (Key -> CommandStart) -> CommandSeek
withKeys a params = return $ map (a . parse) params
where
@ -175,8 +173,23 @@ withNothing :: CommandStart -> CommandSeek
withNothing a [] = return [a]
withNothing _ _ = error "This command takes no parameters."
runFiltered :: (FilePath -> Annex (Maybe a)) -> Annex [FilePath] -> Annex [Annex (Maybe a)]
runFiltered a fs = do
matcher <- Limit.getMatcher
liftM (map $ proc matcher) fs
where
proc matcher f = do
ok <- matcher f
if ok then a f else stop
backendPairs :: (BackendFile -> CommandStart) -> CommandSeek
backendPairs a files = map a <$> Backend.chooseBackends files
backendPairs a fs = do
matcher <- Limit.getMatcher
liftM (map $ proc matcher) (Backend.chooseBackends fs)
where
proc matcher p@(_, f) = do
ok <- matcher f
if ok then a p else stop
{- filter out symlinks -}
notSymlink :: FilePath -> IO Bool

View file

@ -22,20 +22,19 @@ import Utility
type Limit = Utility.Matcher.Token (FilePath -> Annex Bool)
{- Filter out files not matching user-specified limits. -}
filterFiles :: [FilePath] -> Annex [FilePath]
filterFiles l = do
matcher <- getMatcher
filterM (Utility.Matcher.matchM matcher) l
{- Checks if there are user-specified limits. -}
limited :: Annex Bool
limited = (not . Utility.Matcher.matchesAny) <$> getMatcher
limited = (not . Utility.Matcher.matchesAny) <$> getMatcher'
{- Gets a matcher for the user-specified limits. The matcher is cached for
- speed; once it's obtained the user-specified limits can't change. -}
getMatcher :: Annex (Utility.Matcher.Matcher (FilePath -> Annex Bool))
getMatcher :: Annex (FilePath -> Annex Bool)
getMatcher = do
m <- getMatcher'
return $ Utility.Matcher.matchM m
getMatcher' :: Annex (Utility.Matcher.Matcher (FilePath -> Annex Bool))
getMatcher' = do
m <- Annex.getState Annex.limit
case m of
Right r -> return r