getKeysPresent is now fully lazy
.. Allowing it to be used by things in constant space! Random statistics: git annex status has gone from taking 239 mb of memory and 26 seconds in a repo, to 8 mb and 13 seconds. The trick here is the unsafeInterleaveIO, and the form of the function's recursion, which I cribbed heavily from System.IO.HVFS.Utils.recurseDirStat. The difference is, this one goes to a limited depth and avoids statting everything.
This commit is contained in:
parent
ff3644ad38
commit
b325694645
2 changed files with 16 additions and 14 deletions
|
@ -27,6 +27,7 @@ module Annex.Content (
|
||||||
|
|
||||||
import Control.Exception (bracket_)
|
import Control.Exception (bracket_)
|
||||||
import System.Posix.Types
|
import System.Posix.Types
|
||||||
|
import System.IO.Unsafe (unsafeInterleaveIO)
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import Logs.Location
|
import Logs.Location
|
||||||
|
@ -290,19 +291,20 @@ moveBad key = do
|
||||||
|
|
||||||
{- List of keys whose content exists in .git/annex/objects/ -}
|
{- List of keys whose content exists in .git/annex/objects/ -}
|
||||||
getKeysPresent :: Annex [Key]
|
getKeysPresent :: Annex [Key]
|
||||||
getKeysPresent = getKeysPresent' =<< fromRepo gitAnnexObjectDir
|
getKeysPresent = liftIO . traverse (2 :: Int) =<< fromRepo gitAnnexObjectDir
|
||||||
getKeysPresent' :: FilePath -> Annex [Key]
|
where
|
||||||
getKeysPresent' dir = do
|
traverse depth dir = do
|
||||||
exists <- liftIO $ doesDirectoryExist dir
|
contents <- catchDefaultIO (dirContents dir) []
|
||||||
if not exists
|
if depth == 0
|
||||||
then return []
|
then continue (mapMaybe (fileKey . takeFileName) contents) []
|
||||||
else liftIO $ do
|
else do
|
||||||
-- 2 levels of hashing
|
let deeper = traverse (depth - 1)
|
||||||
levela <- dirContents dir
|
continue [] (map deeper contents)
|
||||||
levelb <- mapM dirContents levela
|
continue keys [] = return keys
|
||||||
contents <- unsafeInterleaveIO $ mapM dirContents (concat levelb)
|
continue keys (a:as) = do
|
||||||
let files = concat contents
|
{- Force lazy traversal with unsafeInterleaveIO. -}
|
||||||
return $ mapMaybe (fileKey . takeFileName) files
|
morekeys <- unsafeInterleaveIO a
|
||||||
|
continue (morekeys++keys) as
|
||||||
|
|
||||||
{- Things to do to record changes to content when shutting down.
|
{- Things to do to record changes to content when shutting down.
|
||||||
-
|
-
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -6,7 +6,7 @@ git-annex (3.20120310) UNRELEASED; urgency=low
|
||||||
* unused: Reduce memory usage significantly. Still not constant
|
* unused: Reduce memory usage significantly. Still not constant
|
||||||
space, but now only needs to store the set of file contents that
|
space, but now only needs to store the set of file contents that
|
||||||
are present in the annex in memory.
|
are present in the annex in memory.
|
||||||
* status: Fixed to run in nearly constant space.
|
* status: Fixed to run in constant space.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Sat, 10 Mar 2012 14:03:22 -0400
|
-- Joey Hess <joeyh@debian.org> Sat, 10 Mar 2012 14:03:22 -0400
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue