finish bloom filters

Add tuning, docs, etc.

Not sure if status is the right place to remote size.. perhaps unused
should report the size and also warn if it sees more keys than the bloom
filter allows?
This commit is contained in:
Joey Hess 2012-03-12 16:18:14 -04:00
parent faf3a94fa7
commit 25809ce2e0
7 changed files with 64 additions and 8 deletions

View file

@ -76,6 +76,7 @@ slow_stats =
, local_annex_size
, known_annex_keys
, known_annex_size
, bloom_info
, backend_usage
]
@ -127,7 +128,7 @@ remote_list level desc = stat n $ nojson $ lift $ do
return $ if null s then "0" else show (length rs) ++ "\n" ++ beginning s
where
n = desc ++ " repositories"
local_annex_size :: Stat
local_annex_size = stat "local annex size" $ json id $
showSizeKeys <$> cachedPresentData
@ -136,6 +137,22 @@ local_annex_keys :: Stat
local_annex_keys = stat "local annex keys" $ json show $
countKeys <$> cachedPresentData
bloom_info :: Stat
bloom_info = stat "bloom filter size" $ json id $ do
localkeys <- countKeys <$> cachedPresentData
capacity <- fromIntegral <$> lift Command.Unused.bloomCapacity
let note = aside $
if localkeys >= capacity
then "appears too small for this repository; adjust annex.bloomcapacity"
else "has room for " ++ show (capacity - localkeys) ++ " more local annex keys"
-- Two bloom filters are used at the same time, so double the size
-- of one.
size <- roughSize memoryUnits True . (* 2) . fromIntegral . fst <$>
lift Command.Unused.bloomBitsHashes
return $ size ++ note
known_annex_size :: Stat
known_annex_size = stat "known annex size" $ json id $
showSizeKeys <$> cachedReferencedData