try at using bloom filters
leaks memory
This commit is contained in:
parent
83bbb3bc93
commit
160715166b
1 changed files with 18 additions and 1 deletions
|
@ -12,6 +12,9 @@ module Command.Unused where
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import qualified Data.Text.Lazy as L
|
import qualified Data.Text.Lazy as L
|
||||||
import qualified Data.Text.Lazy.Encoding as L
|
import qualified Data.Text.Lazy.Encoding as L
|
||||||
|
import Data.BloomFilter
|
||||||
|
import Data.BloomFilter.Easy
|
||||||
|
import Data.BloomFilter.Hash
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import Command
|
import Command
|
||||||
|
@ -53,6 +56,18 @@ start = do
|
||||||
showStart "unused" name
|
showStart "unused" name
|
||||||
next action
|
next action
|
||||||
|
|
||||||
|
genBloomFilter :: [Key] -> Annex (Bloom String)
|
||||||
|
genBloomFilter ks = do
|
||||||
|
-- A bloom filter capable of holding one million keys with a
|
||||||
|
-- false positive rate of 0.1% uses 16 mb of memory.
|
||||||
|
-- TODO: make this configurable, for the really large repos,
|
||||||
|
-- or really low false positive rates.
|
||||||
|
let (numbits, numhashes) = suggestSizing 1000000 0.0001
|
||||||
|
return $ fromListB (cheapHashes numhashes) numbits $ map show ks
|
||||||
|
|
||||||
|
bloomFilter :: Bloom String -> [Key] -> [Key]
|
||||||
|
bloomFilter b l = filter (\k -> show k `notElemB` b) l
|
||||||
|
|
||||||
checkUnused :: CommandPerform
|
checkUnused :: CommandPerform
|
||||||
checkUnused = chain 0
|
checkUnused = chain 0
|
||||||
[ check "" unusedMsg $ findunused =<< Annex.getState Annex.fast
|
[ check "" unusedMsg $ findunused =<< Annex.getState Annex.fast
|
||||||
|
@ -65,7 +80,9 @@ checkUnused = chain 0
|
||||||
return []
|
return []
|
||||||
findunused False = do
|
findunused False = do
|
||||||
showAction "checking for unused data"
|
showAction "checking for unused data"
|
||||||
excludeReferenced =<< getKeysPresent
|
b <- genBloomFilter =<< withKeysReferenced [] (:)
|
||||||
|
bloomFilter b <$> getKeysPresent
|
||||||
|
-- TODO: check branches
|
||||||
chain _ [] = next $ return True
|
chain _ [] = next $ return True
|
||||||
chain v (a:as) = do
|
chain v (a:as) = do
|
||||||
v' <- a v
|
v' <- a v
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue