use bloom filter in second pass of sync --all --content

This is needed because when preferred content matches on files,
the second pass would otherwise want to drop all keys. Using a bloom filter
avoids this, and in the case of a false positive, a key will be left
undropped that preferred content would allow dropping. Chances of that
happening are a mere 1 in 1 million.
This commit is contained in:
Joey Hess 2015-06-16 18:38:12 -04:00
parent a0a8127956
commit adba0595bd
4 changed files with 46 additions and 21 deletions

View file

@ -40,11 +40,11 @@ bloomBitsHashes = do
- Once the action completes, the mutable filter is frozen
- for later use.
-}
genBloomFilter :: Hashable v => ((v -> Annex ()) -> Annex b) -> Annex (Bloom v)
genBloomFilter :: Hashable v => ((v -> Annex ()) -> Annex ()) -> Annex (Bloom v)
genBloomFilter populate = do
(numbits, numhashes) <- bloomBitsHashes
bloom <- lift $ newMB (cheapHashes numhashes) numbits
_ <- populate $ \v -> lift $ insertMB bloom v
populate $ \v -> lift $ insertMB bloom v
lift $ unsafeFreezeMB bloom
where
lift = liftIO . stToIO