From b3256946457ec8a2da056573bf49593b225adbd8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 11 Mar 2012 18:04:58 -0400 Subject: [PATCH] getKeysPresent is now fully lazy .. Allowing it to be used by things in constant space! Random statistics: git annex status has gone from taking 239 mb of memory and 26 seconds in a repo, to 8 mb and 13 seconds. The trick here is the unsafeInterleaveIO, and the form of the function's recursion, which I cribbed heavily from System.IO.HVFS.Utils.recurseDirStat. The difference is, this one goes to a limited depth and avoids statting everything. --- Annex/Content.hs | 28 +++++++++++++++------------- debian/changelog | 2 +- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index bf5a6c3a7e..ccaff5c564 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -27,6 +27,7 @@ module Annex.Content ( import Control.Exception (bracket_) import System.Posix.Types +import System.IO.Unsafe (unsafeInterleaveIO) import Common.Annex import Logs.Location @@ -290,19 +291,20 @@ moveBad key = do {- List of keys whose content exists in .git/annex/objects/ -} getKeysPresent :: Annex [Key] -getKeysPresent = getKeysPresent' =<< fromRepo gitAnnexObjectDir -getKeysPresent' :: FilePath -> Annex [Key] -getKeysPresent' dir = do - exists <- liftIO $ doesDirectoryExist dir - if not exists - then return [] - else liftIO $ do - -- 2 levels of hashing - levela <- dirContents dir - levelb <- mapM dirContents levela - contents <- unsafeInterleaveIO $ mapM dirContents (concat levelb) - let files = concat contents - return $ mapMaybe (fileKey . takeFileName) files +getKeysPresent = liftIO . traverse (2 :: Int) =<< fromRepo gitAnnexObjectDir + where + traverse depth dir = do + contents <- catchDefaultIO (dirContents dir) [] + if depth == 0 + then continue (mapMaybe (fileKey . takeFileName) contents) [] + else do + let deeper = traverse (depth - 1) + continue [] (map deeper contents) + continue keys [] = return keys + continue keys (a:as) = do + {- Force lazy traversal with unsafeInterleaveIO. -} + morekeys <- unsafeInterleaveIO a + continue (morekeys++keys) as {- Things to do to record changes to content when shutting down. - diff --git a/debian/changelog b/debian/changelog index 6da54056c7..120513806e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,7 +6,7 @@ git-annex (3.20120310) UNRELEASED; urgency=low * unused: Reduce memory usage significantly. Still not constant space, but now only needs to store the set of file contents that are present in the annex in memory. - * status: Fixed to run in nearly constant space. + * status: Fixed to run in constant space. -- Joey Hess Sat, 10 Mar 2012 14:03:22 -0400