status: Fixed to run in nearly constant space.
Before, it leaked space due to caching lists of keys. Now all necessary data about keys is calculated as they stream in. The "nearly constant" is due to getKeysPresent, which builds up a lot of [] thunks as it traverses .git/annex/objects/. Will deal with it later.
This commit is contained in:
parent
b086e32c63
commit
ff3644ad38
4 changed files with 67 additions and 49 deletions
|
@ -300,7 +300,7 @@ getKeysPresent' dir = do
|
|||
-- 2 levels of hashing
|
||||
levela <- dirContents dir
|
||||
levelb <- mapM dirContents levela
|
||||
contents <- mapM dirContents (concat levelb)
|
||||
contents <- unsafeInterleaveIO $ mapM dirContents (concat levelb)
|
||||
let files = concat contents
|
||||
return $ mapMaybe (fileKey . takeFileName) files
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@
|
|||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
|
||||
module Command.Status where
|
||||
|
||||
import Control.Monad.State.Strict
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
import Data.Set (Set)
|
||||
import Text.JSON
|
||||
|
||||
import Common.Annex
|
||||
|
@ -32,10 +32,18 @@ import Remote
|
|||
-- a named computation that produces a statistic
|
||||
type Stat = StatState (Maybe (String, StatState String))
|
||||
|
||||
-- cached info that multiple Stats may need
|
||||
-- data about a set of keys
|
||||
data KeyData = KeyData
|
||||
{ countKeys :: Integer
|
||||
, sizeKeys :: Integer
|
||||
, unknownSizeKeys :: Integer
|
||||
, backendsKeys :: M.Map String Integer
|
||||
}
|
||||
|
||||
-- cached info that multiple Stats use
|
||||
data StatInfo = StatInfo
|
||||
{ keysPresentCache :: Maybe (Set Key)
|
||||
, keysReferencedCache :: Maybe (Set Key)
|
||||
{ presentData :: Maybe KeyData
|
||||
, referencedData :: Maybe KeyData
|
||||
}
|
||||
|
||||
-- a state monad for running Stats in
|
||||
|
@ -122,19 +130,19 @@ remote_list level desc = stat n $ nojson $ lift $ do
|
|||
|
||||
local_annex_size :: Stat
|
||||
local_annex_size = stat "local annex size" $ json id $
|
||||
keySizeSum <$> cachedKeysPresent
|
||||
showSizeKeys <$> cachedPresentData
|
||||
|
||||
local_annex_keys :: Stat
|
||||
local_annex_keys = stat "local annex keys" $ json show $
|
||||
S.size <$> cachedKeysPresent
|
||||
countKeys <$> cachedPresentData
|
||||
|
||||
known_annex_size :: Stat
|
||||
known_annex_size = stat "known annex size" $ json id $
|
||||
keySizeSum <$> cachedKeysReferenced
|
||||
showSizeKeys <$> cachedReferencedData
|
||||
|
||||
known_annex_keys :: Stat
|
||||
known_annex_keys = stat "known annex keys" $ json show $
|
||||
S.size <$> cachedKeysReferenced
|
||||
countKeys <$> cachedReferencedData
|
||||
|
||||
tmp_size :: Stat
|
||||
tmp_size = staleSize "temporary directory size" gitAnnexTmpDir
|
||||
|
@ -144,46 +152,62 @@ bad_data_size = staleSize "bad keys size" gitAnnexBadDir
|
|||
|
||||
backend_usage :: Stat
|
||||
backend_usage = stat "backend usage" $ nojson $
|
||||
calc <$> cachedKeysReferenced <*> cachedKeysPresent
|
||||
calc
|
||||
<$> (backendsKeys <$> cachedReferencedData)
|
||||
<*> (backendsKeys <$> cachedPresentData)
|
||||
where
|
||||
calc a b = pp "" $ reverse . sort $ map swap $ splits $ S.toList $ S.union a b
|
||||
splits :: [Key] -> [(String, Integer)]
|
||||
splits ks = M.toList $ M.fromListWith (+) $ map tcount ks
|
||||
tcount k = (keyBackendName k, 1)
|
||||
swap (a, b) = (b, a)
|
||||
calc a b = pp "" $ reverse . sort $ map swap $ M.toList $ M.unionWith (+) a b
|
||||
pp c [] = c
|
||||
pp c ((n, b):xs) = "\n\t" ++ b ++ ": " ++ show n ++ pp c xs
|
||||
swap (a, b) = (b, a)
|
||||
|
||||
cachedKeysPresent :: StatState (Set Key)
|
||||
cachedKeysPresent = do
|
||||
cachedPresentData :: StatState KeyData
|
||||
cachedPresentData = do
|
||||
s <- get
|
||||
case keysPresentCache s of
|
||||
case presentData s of
|
||||
Just v -> return v
|
||||
Nothing -> do
|
||||
keys <- S.fromList <$> lift getKeysPresent
|
||||
put s { keysPresentCache = Just keys }
|
||||
return keys
|
||||
v <- foldKeys <$> lift getKeysPresent
|
||||
put s { presentData = Just v }
|
||||
return v
|
||||
|
||||
cachedKeysReferenced :: StatState (Set Key)
|
||||
cachedKeysReferenced = do
|
||||
cachedReferencedData :: StatState KeyData
|
||||
cachedReferencedData = do
|
||||
s <- get
|
||||
case keysReferencedCache s of
|
||||
case referencedData s of
|
||||
Just v -> return v
|
||||
Nothing -> do
|
||||
keys <- S.fromList <$> lift Command.Unused.getKeysReferenced
|
||||
put s { keysReferencedCache = Just keys }
|
||||
return keys
|
||||
!v <- lift $ Command.Unused.withKeysReferenced
|
||||
emptyKeyData addKey
|
||||
put s { referencedData = Just v }
|
||||
return v
|
||||
|
||||
keySizeSum :: Set Key -> String
|
||||
keySizeSum s = total ++ missingnote
|
||||
emptyKeyData :: KeyData
|
||||
emptyKeyData = KeyData 0 0 0 M.empty
|
||||
|
||||
foldKeys :: [Key] -> KeyData
|
||||
foldKeys = foldl' (flip addKey) emptyKeyData
|
||||
|
||||
addKey :: Key -> KeyData -> KeyData
|
||||
addKey key (KeyData count size unknownsize backends) =
|
||||
KeyData count' size' unknownsize' backends'
|
||||
where
|
||||
knownsizes = mapMaybe keySize $ S.toList s
|
||||
total = roughSize storageUnits False $ sum knownsizes
|
||||
missing = S.size s - genericLength knownsizes
|
||||
{- All calculations strict to avoid thunks when repeatedly
|
||||
- applied to many keys. -}
|
||||
!count' = count + 1
|
||||
!backends' = M.insertWith' (+) (keyBackendName key) 1 backends
|
||||
!size' = maybe size (+ size) ks
|
||||
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
|
||||
ks = keySize key
|
||||
|
||||
showSizeKeys :: KeyData -> String
|
||||
showSizeKeys d = total ++ missingnote
|
||||
where
|
||||
total = roughSize storageUnits False $ sizeKeys d
|
||||
missingnote
|
||||
| missing == 0 = ""
|
||||
| unknownSizeKeys d == 0 = ""
|
||||
| otherwise = aside $
|
||||
"+ " ++ show missing ++
|
||||
"+ " ++ show (unknownSizeKeys d) ++
|
||||
" keys of unknown size"
|
||||
|
||||
staleSize :: String -> (Git.Repo -> FilePath) -> Stat
|
||||
|
@ -192,7 +216,7 @@ staleSize label dirspec = do
|
|||
if null keys
|
||||
then nostat
|
||||
else stat label $ json (++ aside "clean up with git-annex unused") $
|
||||
return $ keySizeSum $ S.fromList keys
|
||||
return $ showSizeKeys $ foldKeys keys
|
||||
|
||||
aside :: String -> String
|
||||
aside s = " (" ++ s ++ ")"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2010-2011 Joey Hess <joey@kitenet.net>
|
||||
- Copyright 2010-2012 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -171,7 +171,7 @@ excludeReferenced l = do
|
|||
go s (r:rs)
|
||||
| s == S.empty = return [] -- optimisation
|
||||
| otherwise = do
|
||||
!s' <- withKeysReferencedInGit r s S.delete
|
||||
s' <- withKeysReferencedInGit r s S.delete
|
||||
go s' rs
|
||||
|
||||
{- Finds items in the first, smaller list, that are not
|
||||
|
@ -186,21 +186,14 @@ exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
|
|||
where
|
||||
remove a b = foldl (flip S.delete) b a
|
||||
|
||||
{- List of keys referenced by symlinks in the git repo. -}
|
||||
getKeysReferenced :: Annex [Key]
|
||||
getKeysReferenced = do
|
||||
top <- fromRepo Git.workTree
|
||||
files <- inRepo $ LsFiles.inRepo [top]
|
||||
keypairs <- mapM Backend.lookupFile files
|
||||
return $ map fst $ catMaybes keypairs
|
||||
|
||||
{- Given an initial value, mutates it using an action for each
|
||||
- key referenced by symlinks in the git repo. -}
|
||||
withKeysReferenced :: v -> (Key -> v -> v) -> Annex v
|
||||
withKeysReferenced initial a = do
|
||||
top <- fromRepo Git.workTree
|
||||
go initial =<< inRepo (LsFiles.inRepo [top])
|
||||
withKeysReferenced initial a = go initial =<< files
|
||||
where
|
||||
files = do
|
||||
top <- fromRepo Git.workTree
|
||||
inRepo $ LsFiles.inRepo [top]
|
||||
go v [] = return v
|
||||
go v (f:fs) = do
|
||||
x <- Backend.lookupFile f
|
||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -6,6 +6,7 @@ git-annex (3.20120310) UNRELEASED; urgency=low
|
|||
* unused: Reduce memory usage significantly. Still not constant
|
||||
space, but now only needs to store the set of file contents that
|
||||
are present in the annex in memory.
|
||||
* status: Fixed to run in nearly constant space.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Sat, 10 Mar 2012 14:03:22 -0400
|
||||
|
||||
|
|
Loading…
Reference in a new issue