getKeysPresent is now fully lazy
.. Allowing it to be used by things in constant space! Random statistics: git annex status has gone from taking 239 mb of memory and 26 seconds in a repo, to 8 mb and 13 seconds. The trick here is the unsafeInterleaveIO, and the form of the function's recursion, which I cribbed heavily from System.IO.HVFS.Utils.recurseDirStat. The difference is, this one goes to a limited depth and avoids statting everything.
This commit is contained in:
parent
ff3644ad38
commit
b325694645
2 changed files with 16 additions and 14 deletions
|
@ -27,6 +27,7 @@ module Annex.Content (
|
|||
|
||||
import Control.Exception (bracket_)
|
||||
import System.Posix.Types
|
||||
import System.IO.Unsafe (unsafeInterleaveIO)
|
||||
|
||||
import Common.Annex
|
||||
import Logs.Location
|
||||
|
@ -290,19 +291,20 @@ moveBad key = do
|
|||
|
||||
{- List of keys whose content exists in .git/annex/objects/ -}
|
||||
getKeysPresent :: Annex [Key]
|
||||
getKeysPresent = getKeysPresent' =<< fromRepo gitAnnexObjectDir
|
||||
getKeysPresent' :: FilePath -> Annex [Key]
|
||||
getKeysPresent' dir = do
|
||||
exists <- liftIO $ doesDirectoryExist dir
|
||||
if not exists
|
||||
then return []
|
||||
else liftIO $ do
|
||||
-- 2 levels of hashing
|
||||
levela <- dirContents dir
|
||||
levelb <- mapM dirContents levela
|
||||
contents <- unsafeInterleaveIO $ mapM dirContents (concat levelb)
|
||||
let files = concat contents
|
||||
return $ mapMaybe (fileKey . takeFileName) files
|
||||
getKeysPresent = liftIO . traverse (2 :: Int) =<< fromRepo gitAnnexObjectDir
|
||||
where
|
||||
traverse depth dir = do
|
||||
contents <- catchDefaultIO (dirContents dir) []
|
||||
if depth == 0
|
||||
then continue (mapMaybe (fileKey . takeFileName) contents) []
|
||||
else do
|
||||
let deeper = traverse (depth - 1)
|
||||
continue [] (map deeper contents)
|
||||
continue keys [] = return keys
|
||||
continue keys (a:as) = do
|
||||
{- Force lazy traversal with unsafeInterleaveIO. -}
|
||||
morekeys <- unsafeInterleaveIO a
|
||||
continue (morekeys++keys) as
|
||||
|
||||
{- Things to do to record changes to content when shutting down.
|
||||
-
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -6,7 +6,7 @@ git-annex (3.20120310) UNRELEASED; urgency=low
|
|||
* unused: Reduce memory usage significantly. Still not constant
|
||||
space, but now only needs to store the set of file contents that
|
||||
are present in the annex in memory.
|
||||
* status: Fixed to run in nearly constant space.
|
||||
* status: Fixed to run in constant space.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Sat, 10 Mar 2012 14:03:22 -0400
|
||||
|
||||
|
|
Loading…
Reference in a new issue