From 9a5cd96f0dd0593dd0c62f084c90898baa92ba1d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 13 Oct 2020 16:31:01 -0400 Subject: [PATCH] Fix a memory leak introduced in the last release The problem was this line: cleanup = and <$> sequence (map snd v) That caused all of v to be held onto until the end, when the cleanup action was run. I could not seem to find a bang pattern that avoided the leak, so I resorted to a IORef, rather clunky, but not a performance problem because it will only be written once per git ls-files, so typically just 1 time. This commit was sponsored by Mark Reidenbach on Patreon. --- CHANGELOG | 1 + CmdLine/Seek.hs | 19 +++++++++++++------ Utility/Path.hs | 12 ++++-------- ..._244acef6b20ac8fc281372f0898a49be._comment | 12 ++++++++++++ 4 files changed, 30 insertions(+), 14 deletions(-) create mode 100644 doc/todo/memory_use_increase/comment_8_244acef6b20ac8fc281372f0898a49be._comment diff --git a/CHANGELOG b/CHANGELOG index 5bc58e6f36..cdffcba3bf 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ git-annex (8.20201008) UNRELEASED; urgency=medium * Fix build on Windows with network-3. + * Fix a memory leak introduced in the last release. -- Joey Hess Thu, 08 Oct 2020 10:48:17 -0400 diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 9a5f2d246e..34a9d20465 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -48,6 +48,7 @@ import CmdLine.Action import Control.Concurrent.Async import System.Posix.Types +import Data.IORef data AnnexedFileSeeker = AnnexedFileSeeker { startAction :: SeekInput -> RawFilePath -> Key -> CommandStart @@ -416,18 +417,24 @@ seekFilteredKeys seeker listfs = do seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> WorkTreeItems -> Annex ([(SeekInput, a)], IO Bool) seekHelper c ww a (WorkTreeItems l) = do os <- seekOptions ww - inRepo $ \g -> combinelists <$> forM (segmentXargsOrdered l) - (runSegmentPaths' mk c (\fs -> a os fs g) . map toRawFilePath) + v <- liftIO $ newIORef [] + r <- inRepo $ \g -> concat . concat <$> forM (segmentXargsOrdered l) + (runSegmentPaths' mk c (\fs -> go v os fs g) . map toRawFilePath) + return (r, cleanupall v) where mk (Just i) f = (SeekInput [fromRawFilePath i], f) -- This is not accurate, but it only happens when there are a -- great many input WorkTreeItems. mk Nothing f = (SeekInput [fromRawFilePath (c f)], f) - combinelists v = - let r = concat $ concat $ map fst v - cleanup = and <$> sequence (map snd v) - in (r, cleanup) + go v os fs g = do + (l, cleanup) <- a os fs g + liftIO $ modifyIORef' v (cleanup:) + return l + + cleanupall v = do + cleanups <- readIORef v + and <$> sequence cleanups seekHelper _ _ _ NoWorkTreeItems = return ([], pure True) data WarnUnmatchWhen = WarnUnmatchLsFiles | WarnUnmatchWorkTreeItems diff --git a/Utility/Path.hs b/Utility/Path.hs index 570445076f..6f38b07c13 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -235,15 +235,11 @@ segmentPaths' f c (i:is) new = - than it would be to run the action separately with each path. In - the case of git file list commands, that assumption tends to hold. -} -runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO ([a], v)) -> [RawFilePath] -> IO ([[a]], v) -runSegmentPaths c a paths = do - (l, cleanup) <- a paths - return (segmentPaths c paths l, cleanup) +runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[a]] +runSegmentPaths c a paths = segmentPaths c paths <$> a paths -runSegmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> ([RawFilePath] -> IO ([a], v)) -> [RawFilePath] -> IO ([[r]], v) -runSegmentPaths' si c a paths = do - (l, cleanup) <- a paths - return (segmentPaths' si c paths l, cleanup) +runSegmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[r]] +runSegmentPaths' si c a paths = segmentPaths' si c paths <$> a paths {- Converts paths in the home directory to use ~/ -} relHome :: FilePath -> IO String diff --git a/doc/todo/memory_use_increase/comment_8_244acef6b20ac8fc281372f0898a49be._comment b/doc/todo/memory_use_increase/comment_8_244acef6b20ac8fc281372f0898a49be._comment new file mode 100644 index 0000000000..693852e049 --- /dev/null +++ b/doc/todo/memory_use_increase/comment_8_244acef6b20ac8fc281372f0898a49be._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 8""" + date="2020-10-13T19:52:50Z" + content=""" +[[!commit f624876dc21fed141eee368c86f4b209852ab91c]] is where seekHelper +went bad. Fixed that. + +git-annex is still allocating 120 mb in this situation, though the +profile now shows a max memory use of 4 mb. Still don't understand that, so +this remains open. +"""]]