3290a09a70
Converted warning and similar to use StringContainingQuotedPath. Most warnings are static strings, some do refer to filepaths that need to be quoted, and others don't need quoting. Note that, since quote filters out control characters of even UnquotedString, this makes all warnings safe, even when an attacker sneaks in a control character in some other way. When json is being output, no quoting is done, since json gets its own quoting. This does, as a side effect, make warning messages in json output not be indented. The indentation is only needed to offset warning messages underneath the display of the file they apply to, so that's ok. Sponsored-by: Brett Eisenberg on Patreon
54 lines
1.6 KiB
Haskell
54 lines
1.6 KiB
Haskell
{- git-annex bloom filter
|
|
-
|
|
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
module Annex.BloomFilter where
|
|
|
|
import Annex.Common
|
|
import qualified Annex
|
|
import Utility.Bloom
|
|
|
|
import Control.Monad.ST
|
|
|
|
{- A bloom filter capable of holding half a million keys with a
|
|
- false positive rate of 1 in 10000000 uses around 16 mb of memory,
|
|
- so will easily fit on even my lowest memory systems.
|
|
-}
|
|
bloomCapacity :: Annex Int
|
|
bloomCapacity = fromMaybe 500000 . annexBloomCapacity <$> Annex.getGitConfig
|
|
bloomAccuracy :: Annex Int
|
|
bloomAccuracy = fromMaybe 10000000 . annexBloomAccuracy <$> Annex.getGitConfig
|
|
bloomBitsHashes :: Annex (Int, Int)
|
|
bloomBitsHashes = do
|
|
capacity <- bloomCapacity
|
|
accuracy <- bloomAccuracy
|
|
case safeSuggestSizing capacity (1 / fromIntegral accuracy) of
|
|
Left e -> do
|
|
warning $ UnquotedString $
|
|
"bloomfilter " ++ e ++ "; falling back to sane value"
|
|
-- precaulculated value for 500000 (1/10000000)
|
|
return (16777216,23)
|
|
Right v -> return v
|
|
|
|
{- Creates a bloom filter, and runs an action to populate it.
|
|
-
|
|
- The action is passed a callback that it can use to feed values into the
|
|
- bloom filter.
|
|
-
|
|
- Once the action completes, the mutable filter is frozen
|
|
- for later use.
|
|
-}
|
|
genBloomFilter :: Hashable v => ((v -> Annex ()) -> Annex ()) -> Annex (Bloom v)
|
|
genBloomFilter populate = do
|
|
(numbits, numhashes) <- bloomBitsHashes
|
|
bloom <- lift $ newMB (cheapHashes numhashes) numbits
|
|
populate $ \v -> lift $ insertMB bloom v
|
|
lift $ unsafeFreezeMB bloom
|
|
where
|
|
lift = liftIO . stToIO
|
|
|
|
bloomFilter :: [v] -> Bloom v -> [v]
|
|
bloomFilter l bloom = filter (\v -> v `notElemB` bloom) l
|