status: Massively sped up; remove --fast mode.
Using Sets is the right thing; they have constant size lookup like my SizeList, and logn insertation, which beats nub to death. Runs faster than --fast mode did before, and gives accurate counts. 13 seconds total runtime with a warm cache in a repository with 40 thousand keys.
This commit is contained in:
parent
cabbefd9d2
commit
9f5c7a246b
3 changed files with 39 additions and 65 deletions
|
@ -13,8 +13,9 @@ import Data.Maybe
|
||||||
import System.IO
|
import System.IO
|
||||||
import Data.List
|
import Data.List
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.Set as S
|
||||||
|
import Data.Set (Set)
|
||||||
|
|
||||||
import qualified Annex
|
|
||||||
import qualified Types.Backend as B
|
import qualified Types.Backend as B
|
||||||
import qualified Types.Remote as R
|
import qualified Types.Remote as R
|
||||||
import qualified Remote
|
import qualified Remote
|
||||||
|
@ -23,33 +24,23 @@ import qualified Git
|
||||||
import Command
|
import Command
|
||||||
import Types
|
import Types
|
||||||
import Utility.DataUnits
|
import Utility.DataUnits
|
||||||
import Utility.Conditional
|
|
||||||
import Content
|
import Content
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import Locations
|
import Locations
|
||||||
import Backend
|
import Backend
|
||||||
import Messages
|
|
||||||
|
|
||||||
-- a named computation that produces a statistic
|
-- a named computation that produces a statistic
|
||||||
type Stat = StatState (Maybe (String, Bool, StatState String))
|
type Stat = StatState (Maybe (String, StatState String))
|
||||||
|
|
||||||
-- cached info that multiple Stats may need
|
-- cached info that multiple Stats may need
|
||||||
data StatInfo = StatInfo
|
data StatInfo = StatInfo
|
||||||
{ keysPresentCache :: Maybe (SizeList Key)
|
{ keysPresentCache :: Maybe (Set Key)
|
||||||
, keysReferencedCache :: Maybe (SizeList Key)
|
, keysReferencedCache :: Maybe (Set Key)
|
||||||
}
|
}
|
||||||
|
|
||||||
-- a state monad for running Stats in
|
-- a state monad for running Stats in
|
||||||
type StatState = StateT StatInfo Annex
|
type StatState = StateT StatInfo Annex
|
||||||
|
|
||||||
-- a list with a known length
|
|
||||||
-- (Integer is used for the length to avoid
|
|
||||||
-- blowing up if someone annexed billions of files..)
|
|
||||||
type SizeList a = ([a], Integer)
|
|
||||||
|
|
||||||
sizeList :: [a] -> SizeList a
|
|
||||||
sizeList l = (l, genericLength l)
|
|
||||||
|
|
||||||
command :: [Command]
|
command :: [Command]
|
||||||
command = [repoCommand "status" paramNothing seek
|
command = [repoCommand "status" paramNothing seek
|
||||||
"shows status information about the annex"]
|
"shows status information about the annex"]
|
||||||
|
@ -76,15 +67,10 @@ stats =
|
||||||
start :: CommandStart
|
start :: CommandStart
|
||||||
start = do
|
start = do
|
||||||
evalStateT (mapM_ showStat stats) (StatInfo Nothing Nothing)
|
evalStateT (mapM_ showStat stats) (StatInfo Nothing Nothing)
|
||||||
fastmode_note
|
|
||||||
stop
|
stop
|
||||||
|
|
||||||
fastmode_note :: Annex ()
|
stat :: String -> StatState String -> Stat
|
||||||
fastmode_note = whenM (Annex.getState Annex.fast) $
|
stat desc a = return $ Just (desc, a)
|
||||||
showLongNote "(*) approximate due to fast mode"
|
|
||||||
|
|
||||||
stat :: String -> Bool -> StatState String -> Stat
|
|
||||||
stat desc approx a = return $ Just (desc, approx, a)
|
|
||||||
|
|
||||||
nostat :: Stat
|
nostat :: Stat
|
||||||
nostat = return Nothing
|
nostat = return Nothing
|
||||||
|
@ -92,37 +78,35 @@ nostat = return Nothing
|
||||||
showStat :: Stat -> StatState ()
|
showStat :: Stat -> StatState ()
|
||||||
showStat s = calc =<< s
|
showStat s = calc =<< s
|
||||||
where
|
where
|
||||||
calc (Just (desc, approx, a)) = do
|
calc (Just (desc, a)) = do
|
||||||
fast <- lift $ Annex.getState Annex.fast
|
liftIO $ putStr $ desc ++ ": "
|
||||||
let star = if fast && approx then "(*)" else ""
|
|
||||||
liftIO $ putStr $ desc ++ star ++ ": "
|
|
||||||
liftIO $ hFlush stdout
|
liftIO $ hFlush stdout
|
||||||
liftIO . putStrLn =<< a
|
liftIO . putStrLn =<< a
|
||||||
calc Nothing = return ()
|
calc Nothing = return ()
|
||||||
|
|
||||||
supported_backends :: Stat
|
supported_backends :: Stat
|
||||||
supported_backends = stat "supported backends" False $
|
supported_backends = stat "supported backends" $
|
||||||
return $ unwords $ map B.name Backend.list
|
return $ unwords $ map B.name Backend.list
|
||||||
|
|
||||||
supported_remote_types :: Stat
|
supported_remote_types :: Stat
|
||||||
supported_remote_types = stat "supported remote types" False $
|
supported_remote_types = stat "supported remote types" $
|
||||||
return $ unwords $ map R.typename Remote.remoteTypes
|
return $ unwords $ map R.typename Remote.remoteTypes
|
||||||
|
|
||||||
local_annex_size :: Stat
|
local_annex_size :: Stat
|
||||||
local_annex_size = stat "local annex size" False $
|
local_annex_size = stat "local annex size" $
|
||||||
cachedKeysPresent >>= keySizeSum
|
cachedKeysPresent >>= keySizeSum
|
||||||
|
|
||||||
total_annex_size :: Stat
|
total_annex_size :: Stat
|
||||||
total_annex_size = stat "total annex size" True $
|
total_annex_size = stat "total annex size" $
|
||||||
cachedKeysReferenced >>= keySizeSum
|
cachedKeysReferenced >>= keySizeSum
|
||||||
|
|
||||||
local_annex_keys :: Stat
|
local_annex_keys :: Stat
|
||||||
local_annex_keys = stat "local annex keys" False $
|
local_annex_keys = stat "local annex keys" $
|
||||||
show . snd <$> cachedKeysPresent
|
show . S.size <$> cachedKeysPresent
|
||||||
|
|
||||||
total_annex_keys :: Stat
|
total_annex_keys :: Stat
|
||||||
total_annex_keys = stat "total annex keys" True $
|
total_annex_keys = stat "total annex keys" $
|
||||||
show . snd <$> cachedKeysReferenced
|
show . S.size <$> cachedKeysReferenced
|
||||||
|
|
||||||
tmp_size :: Stat
|
tmp_size :: Stat
|
||||||
tmp_size = staleSize "temporary directory size" gitAnnexTmpDir
|
tmp_size = staleSize "temporary directory size" gitAnnexTmpDir
|
||||||
|
@ -131,9 +115,9 @@ bad_data_size :: Stat
|
||||||
bad_data_size = staleSize "bad keys size" gitAnnexBadDir
|
bad_data_size = staleSize "bad keys size" gitAnnexBadDir
|
||||||
|
|
||||||
backend_usage :: Stat
|
backend_usage :: Stat
|
||||||
backend_usage = stat "backend usage" True $ usage <$> cachedKeysReferenced
|
backend_usage = stat "backend usage" $ usage <$> cachedKeysReferenced
|
||||||
where
|
where
|
||||||
usage (ks, _) = pp "" $ sort $ map swap $ splits ks
|
usage ks = pp "" $ sort $ map swap $ splits $ S.toList ks
|
||||||
splits :: [Key] -> [(String, Integer)]
|
splits :: [Key] -> [(String, Integer)]
|
||||||
splits ks = M.toList $ M.fromListWith (+) $ map tcount ks
|
splits ks = M.toList $ M.fromListWith (+) $ map tcount ks
|
||||||
tcount k = (keyBackendName k, 1)
|
tcount k = (keyBackendName k, 1)
|
||||||
|
@ -141,48 +125,44 @@ backend_usage = stat "backend usage" True $ usage <$> cachedKeysReferenced
|
||||||
pp c [] = c
|
pp c [] = c
|
||||||
pp c ((n, b):xs) = "\n\t" ++ b ++ ": " ++ show n ++ pp c xs
|
pp c ((n, b):xs) = "\n\t" ++ b ++ ": " ++ show n ++ pp c xs
|
||||||
|
|
||||||
cachedKeysPresent :: StatState (SizeList Key)
|
cachedKeysPresent :: StatState (Set Key)
|
||||||
cachedKeysPresent = do
|
cachedKeysPresent = do
|
||||||
s <- get
|
s <- get
|
||||||
case keysPresentCache s of
|
case keysPresentCache s of
|
||||||
Just v -> return v
|
Just v -> return v
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
keys <- lift getKeysPresent
|
keys <- S.fromList <$> lift getKeysPresent
|
||||||
let v = sizeList keys
|
put s { keysPresentCache = Just keys }
|
||||||
put s { keysPresentCache = Just v }
|
return keys
|
||||||
return v
|
|
||||||
|
|
||||||
cachedKeysReferenced :: StatState (SizeList Key)
|
cachedKeysReferenced :: StatState (Set Key)
|
||||||
cachedKeysReferenced = do
|
cachedKeysReferenced = do
|
||||||
s <- get
|
s <- get
|
||||||
case keysReferencedCache s of
|
case keysReferencedCache s of
|
||||||
Just v -> return v
|
Just v -> return v
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
-- A given key may be referenced repeatedly,
|
keys <- S.fromList <$> lift Command.Unused.getKeysReferenced
|
||||||
-- so nub is needed for accuracy, but is slow.
|
put s { keysReferencedCache = Just keys }
|
||||||
keys <- lift Command.Unused.getKeysReferenced
|
return keys
|
||||||
fast <- lift $ Annex.getState Annex.fast
|
|
||||||
let v = sizeList $ if fast then keys else nub keys
|
|
||||||
put s { keysReferencedCache = Just v }
|
|
||||||
return v
|
|
||||||
|
|
||||||
keySizeSum :: SizeList Key -> StatState String
|
keySizeSum :: Set Key -> StatState String
|
||||||
keySizeSum (keys, len) = do
|
keySizeSum s = do
|
||||||
let knownsizes = mapMaybe keySize keys
|
let (sizes, unknownsizes) = S.partition isJust $ S.map keySize s
|
||||||
let total = roughSize storageUnits False $ sum knownsizes
|
let total = roughSize storageUnits False $
|
||||||
let missing = len - genericLength knownsizes
|
fromJust $ S.fold (liftM2 (+)) (Just 0) sizes
|
||||||
|
let num = S.size unknownsizes
|
||||||
return $ total ++
|
return $ total ++
|
||||||
if missing > 0
|
if num == 0
|
||||||
then aside $ "but " ++ show missing ++ " keys have unknown size"
|
then ""
|
||||||
else ""
|
else aside $ "but " ++ show num ++ " keys have unknown size"
|
||||||
|
|
||||||
staleSize :: String -> (Git.Repo -> FilePath) -> Stat
|
staleSize :: String -> (Git.Repo -> FilePath) -> Stat
|
||||||
staleSize label dirspec = do
|
staleSize label dirspec = do
|
||||||
keys <- lift (Command.Unused.staleKeys dirspec)
|
keys <- lift (Command.Unused.staleKeys dirspec)
|
||||||
if null keys
|
if null keys
|
||||||
then nostat
|
then nostat
|
||||||
else stat label False $ do
|
else stat label $ do
|
||||||
s <- keySizeSum $ sizeList keys
|
s <- keySizeSum $ S.fromList keys
|
||||||
return $ s ++ aside "clean up with git-annex unused"
|
return $ s ++ aside "clean up with git-annex unused"
|
||||||
|
|
||||||
aside :: String -> String
|
aside :: String -> String
|
||||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -12,8 +12,7 @@ git-annex (3.20110916) UNRELEASED; urgency=low
|
||||||
match the specified conditions.
|
match the specified conditions.
|
||||||
* Note that this is a behavior change for git-annex find! Old behavior
|
* Note that this is a behavior change for git-annex find! Old behavior
|
||||||
can be gotten by using: git-annex find --in .
|
can be gotten by using: git-annex find --in .
|
||||||
* status: In --fast mode, all status info is displayed now; but some
|
* status: Massively sped up; remove --fast mode.
|
||||||
of it is only approximate, and is marked as such.
|
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Sun, 18 Sep 2011 18:25:51 -0400
|
-- Joey Hess <joeyh@debian.org> Sun, 18 Sep 2011 18:25:51 -0400
|
||||||
|
|
||||||
|
|
|
@ -238,11 +238,6 @@ subdirectories).
|
||||||
Displays some statistics and other information, including how much data
|
Displays some statistics and other information, including how much data
|
||||||
is in the annex.
|
is in the annex.
|
||||||
|
|
||||||
Some of the statistics can take a while to generate, and those
|
|
||||||
come last. You can ctrl-c this command once it's displayed the
|
|
||||||
information you wanted to see. Or, use --fast to produce statistics
|
|
||||||
more quickly, but possibly less accurately.
|
|
||||||
|
|
||||||
* map
|
* map
|
||||||
|
|
||||||
Helps you keep track of your repositories, and the connections between them,
|
Helps you keep track of your repositories, and the connections between them,
|
||||||
|
|
Loading…
Reference in a new issue