-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Pure and impure Bloom Filter implementations. -- -- Pure and impure Bloom Filter implementations. @package bloomfilter @version 1.0 -- | Fast hashing of Haskell values. The hash functions used are Bob -- Jenkins's public domain functions, which combine high performance with -- excellent mixing properties. For more details, see -- http://burtleburtle.net/bob/hash/. -- -- In addition to the usual one input, one output hash functions, -- this module provides multi-output hash functions, suitable for use in -- applications that need multiple hashes, such as Bloom filtering. module Data.BloomFilter.Hash class Hashable a hashIO :: (Hashable a) => a -> CInt -> IO CInt hashIO2 :: (Hashable a) => a -> CInt -> CInt -> IO (CInt, CInt) -- | Compute a hash. hash :: (Hashable a) => a -> Word32 -- | Compute a list of hashes. The value to hash may be inspected as many -- times as there are hashes requested. hashes :: (Hashable a) => Int -> a -> [Word32] -- | Compute a list of hashes relatively cheaply. The value to hash is -- inspected at most twice, regardless of the number of hashes requested. -- -- We use a variant of Kirsch and Mitzenmacher's technique from "Less -- Hashing, Same Performance: Building a Better Bloom Filter", -- http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf. -- -- Where Kirsch and Mitzenmacher multiply the second hash by a -- coefficient, we shift right by the coefficient. This offers better -- performance (as a shift is much cheaper than a multiply), and the low -- order bits of the final hash stay well mixed. cheapHashes :: (Hashable a) => Int -> a -> [Word32] -- | Compute a hash of a Storable instance. hashOne :: (Storable a) => a -> CInt -> IO CInt -- | Compute two hashes of a Storable instance. hashTwo :: (Storable a) => a -> CInt -> CInt -> IO (CInt, CInt) -- | Compute a hash of a list of Storable instances. hashList :: (Storable a) => [a] -> CInt -> IO CInt -- | Compute two hashes of a list of Storable instances. hashList2 :: (Storable a) => [a] -> CInt -> CInt -> IO (CInt, CInt) instance (Storable a) => Hashable [a] instance (Hashable a, Hashable b, Hashable c, Hashable d, Hashable e) => Hashable (a, b, c, d, e) instance (Hashable a, Hashable b, Hashable c, Hashable d) => Hashable (a, b, c, d) instance (Hashable a, Hashable b, Hashable c) => Hashable (a, b, c) instance (Hashable a, Hashable b) => Hashable (a, b) instance (Hashable a, Hashable b) => Hashable (Either a b) instance (Hashable a) => Hashable (Maybe a) instance Hashable ByteString instance Hashable ByteString instance Hashable Word64 instance Hashable Word32 instance Hashable Word16 instance Hashable Word8 instance Hashable Int64 instance Hashable Int32 instance Hashable Int16 instance Hashable Int8 instance Hashable Double instance Hashable Float instance Hashable Int instance Hashable Char instance Hashable Ordering instance Hashable Bool instance Hashable Integer instance Hashable () -- | A fast, space efficient Bloom filter implementation. A Bloom filter is -- a set-like data structure that provides a probabilistic membership -- test. -- -- -- -- This module provides low-level control. For an easier to use -- interface, see the Data.BloomFilter.Easy module. module Data.BloomFilter -- | A hash value is 32 bits wide. This limits the maximum size of a filter -- to about four billion elements, or 512 megabytes of memory. type Hash = Word32 -- | An immutable Bloom filter, suitable for querying from pure code. data Bloom a -- | A mutable Bloom filter, for use within the ST monad. data MBloom s a -- | Build an immutable Bloom filter from a seed value. The seeding -- function populates the filter as follows. -- -- unfoldB :: (a -> [Hash]) -> Int -> (b -> Maybe (a, b)) -> b -> Bloom a -- | Create an immutable Bloom filter, populating it from a list of values. -- -- Here is an example that uses the cheapHashes function from -- the Data.BloomFilter.Hash module to create a hash function that -- returns three hashes. -- --
--   import Data.BloomFilter.Hash (cheapHashes)
--   
--   filt = fromListB (cheapHashes 3) 1024 ["foo", "bar", "quux"]
--   
fromListB :: (a -> [Hash]) -> Int -> [a] -> Bloom a -- | Create an immutable Bloom filter, using the given setup function which -- executes in the ST monad. -- -- Example: -- --
--   import Data.BloomFilter.Hash (cheapHashes)
--   
--   filter = createB (cheapHashes 3) 1024 $ mf -> do
--              insertMB mf "foo"
--              insertMB mf "bar"
--   
-- -- Note that the result of the setup function is not used. createB :: (a -> [Hash]) -> Int -> (forall s. MBloom s a -> ST s z) -> Bloom a -- | Return the size of an immutable Bloom filter, in bits. lengthB :: Bloom a -> Int -- | Query an immutable Bloom filter for membership. If the value is -- present, return True. If the value is not present, there is -- still some possibility that True will be returned. elemB :: a -> Bloom a -> Bool -- | Create a new mutable Bloom filter. For efficiency, the number of bits -- used may be larger than the number requested. It is always rounded up -- to the nearest higher power of two. -- -- For a safer creation interface, use createB. To convert a -- mutable filter to an immutable filter for use in pure code, use -- unsafeFreezeMB. newMB :: (a -> [Hash]) -> Int -> ST s (MBloom s a) -- | Create an immutable Bloom filter from a mutable one. The mutable -- filter must not be modified afterwards, or a runtime crash may -- occur. For a safer creation interface, use createB. unsafeFreezeMB :: MBloom s a -> ST s (Bloom a) -- | Copy an immutable Bloom filter to create a mutable one. There is no -- non-copying equivalent. thawMB :: Bloom a -> ST s (MBloom s a) -- | Return the size of a mutable Bloom filter, in bits. lengthMB :: MBloom s a -> Int -- | Query a mutable Bloom filter for membership. If the value is present, -- return True. If the value is not present, there is -- still some possibility that True will be returned. elemMB :: a -> MBloom s a -> ST s Bool -- | Insert a value into a mutable Bloom filter. Afterwards, a membership -- query for the same value is guaranteed to return True. insertMB :: MBloom s a -> a -> ST s () bitArrayB :: Bloom a -> (UArray Int Hash) bitArrayMB :: MBloom s a -> (STUArray s Int Hash) instance Show (Bloom a) instance Show (MBloom s a) -- | An easy-to-use Bloom filter interface. module Data.BloomFilter.Easy -- | An immutable Bloom filter, suitable for querying from pure code. data Bloom a -- | Create a Bloom filter with the given false positive rate and members. -- The hash functions used are computed by the cheapHashes -- function from the Data.BloomFilter.Hash module. easyList :: (Hashable a) => Double -> [a] -> Bloom a -- | Query an immutable Bloom filter for membership. If the value is -- present, return True. If the value is not present, there is -- still some possibility that True will be returned. elemB :: a -> Bloom a -> Bool -- | Return the size of an immutable Bloom filter, in bits. lengthB :: Bloom a -> Int -- | Suggest a good combination of filter size and number of hash functions -- for a Bloom filter, based on its expected maximum capacity and a -- desired false positive rate. -- -- The false positive rate is the rate at which queries against the -- filter should return True when an element is not actually -- present. It should be a fraction between 0 and 1, so a 1% false -- positive rate is represented by 0.01. suggestSizing :: Int -> Double -> (Int, Int)