git-annex/Annex/BloomFilter.hs
Joey Hess 3290a09a70
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.

Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.

When json is being output, no quoting is done, since json gets its own
quoting.

This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.

Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 15:55:44 -04:00

54 lines
1.6 KiB
Haskell

{- git-annex bloom filter
-
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.BloomFilter where
import Annex.Common
import qualified Annex
import Utility.Bloom
import Control.Monad.ST
{- A bloom filter capable of holding half a million keys with a
- false positive rate of 1 in 10000000 uses around 16 mb of memory,
- so will easily fit on even my lowest memory systems.
-}
bloomCapacity :: Annex Int
bloomCapacity = fromMaybe 500000 . annexBloomCapacity <$> Annex.getGitConfig
bloomAccuracy :: Annex Int
bloomAccuracy = fromMaybe 10000000 . annexBloomAccuracy <$> Annex.getGitConfig
bloomBitsHashes :: Annex (Int, Int)
bloomBitsHashes = do
capacity <- bloomCapacity
accuracy <- bloomAccuracy
case safeSuggestSizing capacity (1 / fromIntegral accuracy) of
Left e -> do
warning $ UnquotedString $
"bloomfilter " ++ e ++ "; falling back to sane value"
-- precaulculated value for 500000 (1/10000000)
return (16777216,23)
Right v -> return v
{- Creates a bloom filter, and runs an action to populate it.
-
- The action is passed a callback that it can use to feed values into the
- bloom filter.
-
- Once the action completes, the mutable filter is frozen
- for later use.
-}
genBloomFilter :: Hashable v => ((v -> Annex ()) -> Annex ()) -> Annex (Bloom v)
genBloomFilter populate = do
(numbits, numhashes) <- bloomBitsHashes
bloom <- lift $ newMB (cheapHashes numhashes) numbits
populate $ \v -> lift $ insertMB bloom v
lift $ unsafeFreezeMB bloom
where
lift = liftIO . stToIO
bloomFilter :: [v] -> Bloom v -> [v]
bloomFilter l bloom = filter (\v -> v `notElemB` bloom) l