-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Haskell Implementation of Cuckoo Filters -- -- Haskell implementation of Cuckoo filters as described in -- -- B. Fan, D.G. Anderson, M. Kaminsky, M.D. Mitzenmacher. Cuckoo -- Filter: Practically Better Than Bloom. In Proc. CoNEXT, 2014. -- -- Cuckoo filters are a data structure for probabilistic set membership. -- They support insertion, deletion, and membership queries for set -- elements. -- -- Membership queries may return false positive results. But queries -- don't return false negative results. -- -- Unlike Bloom filters, Cuckoo filters maintain an upper bound on the -- false positive rate that is independent of the load of the filter. -- However, insertion of new elements in the filter can fail. For typical -- configurations this probability is very small for load factors smaller -- than 90 percent. @package cuckoo @version 0.2.2 -- | Internal Utilities. No guarantee is made about the stability of these -- functions. Changes to these function won't be announced in the -- CHANGELOG and are not reflected in the package version. module Data.Cuckoo.Internal -- | Reify type level Nat into Int value. w :: forall (n :: Nat). KnownNat n => Int -- | An shorter alias for fromIntegral. int :: Integral a => Num b => a -> b -- | fit a b computes how many bs are needed to fit -- a, i.e. <math>. -- -- For instance, -- --
--   >>> fit 7 3
--   3
--   
-- --
--   >>> fit 6 3
--   2
--   
fit :: Real a => Real b => Integral c => a -> b -> c -- | fit a b computes how many bs are needed to fit -- a, i.e. <math>. -- -- For instance, -- --
--   >>> intFit 7 3
--   3
--   
-- --
--   >>> intFit 6 3
--   2
--   
intFit :: Integral a => Integral b => a -> b -> a -- | nextPowerOfTwo a computes the smallest power of two that is -- larger or equal than a. nextPowerOfTwo :: Real a => Integral b => a -> b -- | nextPowerOfTwo a computes the smallest power of two that is -- larger or equal than a. intNextPowerOfTwo :: Int -> Int -- | Write a Word64 value into a Word32 aligned -- MutableByteArray set :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> Word64 -> m () -- | Get a Word64 from a Word32 aligned -- MutableByteArray. get :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> m Word64 -- | Computes a Sip hash for a value that has an Storable instance. -- -- The first argument is a salt value that is used to derive the key for -- the hash computation. sip :: Storable a => Int -> a -> Word64 -- | Computes a 64 bit Fnv1a hash for a value that has an Storable -- instance. -- -- The first argument is use as a salt. fnv1a :: Storable a => Int -> a -> Word64 -- | Computes a 64 bit Fnv1a hash for a value that is an instance of -- ByteArrayAccess. -- -- The first argument is use as a salt. fnv1a_bytes :: ByteArrayAccess a => Int -> a -> Word64 -- | Computes a Sip hash for a value that is an instance of -- ByteArrayAccess. -- -- The first argument is a salt value that is used to derive the key for -- the hash computation. sip_bytes :: ByteArrayAccess a => Int -> a -> Word64 -- | An version of a Sip hash that is used internally. In order to avoid -- dependencies between different hash computations, it shouldn't be used -- in the implementation of instances of CuckooFilterHash. sip2 :: Storable a => Int -> a -> Word64 -- | Haskell implementation of Cuckoo filters as described in -- -- B. Fan, D.G. Anderson, M. Kaminsky, M.D. Mitzenmacher. Cuckoo -- Filter: Practically Better Than Bloom. In Proc. CoNEXT, 2014. -- -- Cuckoo filters are a data structure for probabilistic set membership. -- They support insertion, deletion, and membership queries for set -- elements. -- -- Membership queries may return false positive results. But queries -- don't return false negative results. -- -- Unlike Bloom filters, Cuckoo filters maintain an upper bound on the -- false positive rate that is independent of the load of the filter. -- However, insertion of new elements in the filter can fail. For typical -- configurations this probability is very small for load factors smaller -- than 90 percent. -- --

Example

-- --
--   {-# LANGUAGE DataKinds #-}
--   {-# LANGUAGE TypeApplications #-}
--   {-# LANGUAGE TypeFamilies #-}
--   {-# OPTIONS_GHC -fno-warn-orphans #-}
--   
--   import Control.Monad (filterM)
--   import Data.Cuckoo
--   import Data.List ((\\))
--   
--   -- Define CuckooFilterHash instance (this uses the default implementation)
--   instance CuckooFilterHash Int
--   
--   main :: IO ()
--   main = do
--       -- Create Filter for a minimum of 500000 entries
--       f <- newCuckooFilter @4 @8 @Int 0 500000
--   
--       -- Insert 450000 items
--       failed <- filterM (fmap not . insert f) [0..500000-1]
--   
--       -- Query inserted items
--       missing <- filterM (fmap not . member f) [0..500000-1]
--   
--       -- Test for false positives
--       false <- filterM (member f) [500000..1000000 - 1]
--   
--       -- Report results
--       putStrLn $ "failed inserts: " <> show (length failed)
--       putStrLn $ "false positives: " <> show (length false)
--       putStrLn $ "false positive rate (%): " <> show @Double (fromIntegral (length false) * 100 / 500000)
--       putStrLn $ "missing (must be 0): " <> show (length $ missing \\ failed)
--   
--       -- Filter properties
--       putStrLn $ "capacity: " <> show (capacityInItems f)
--       putStrLn $ "size in allocated bytes: " <> show (sizeInAllocatedBytes f)
--   
--       -- computing the following is a bit slow
--       c <- itemCount f
--       putStrLn $ "item count: " <> show c
--       lf <- loadFactor f
--       putStrLn $ "load factor (%): " <> show lf
--       putStrLn $ "bits per item: " <> show @Double (fromIntegral (sizeInAllocatedBytes f) * 8 / fromIntegral c)
--   
module Data.Cuckoo -- | Salt for hash computations. newtype Salt Salt :: Int -> Salt -- | Choosing good hash functions is imperative for a good performance of a -- cuckoo filter. The hash functions must be -- -- -- -- The default implementations use sip hash for cuckooHash and -- fnv1a (64 bit) for cuckooFingerprint and require an -- instance of Storable. -- --
--   >>> instance CuckooFilterHash Int
--   
-- -- The following example uses the hash functions that are provided in -- this module to define an instance for ByteString: -- --
--   >>> import qualified Data.ByteString as B
--   
--   >>> :{
--   instance CuckooFilterHash B.ByteString where
--       cuckooHash (Salt s) a = fnv1a_bytes s a
--       cuckooFingerprint (Salt s) a = sip_bytes s a
--       {-# INLINE cuckooHash #-}
--       {-# INLINE cuckooFingerprint #-}
--   :}
--   
class CuckooFilterHash a -- | This function must provide good entropy on the lower <math> bits -- of the result, where <math> is the number of buckets. cuckooHash :: CuckooFilterHash a => Salt -> a -> Word64 -- | This function must provide good entropy on the lower bits of the size -- of a fingerprint. cuckooFingerprint :: CuckooFilterHash a => Salt -> a -> Word64 -- | This function must provide good entropy on the lower <math> bits -- of the result, where <math> is the number of buckets. cuckooHash :: (CuckooFilterHash a, Storable a) => Salt -> a -> Word64 -- | This function must provide good entropy on the lower bits of the size -- of a fingerprint. cuckooFingerprint :: (CuckooFilterHash a, Storable a) => Salt -> a -> Word64 -- | Computes a Sip hash for a value that has an Storable instance. -- -- The first argument is a salt value that is used to derive the key for -- the hash computation. sip :: Storable a => Int -> a -> Word64 -- | Computes a Sip hash for a value that is an instance of -- ByteArrayAccess. -- -- The first argument is a salt value that is used to derive the key for -- the hash computation. sip_bytes :: ByteArrayAccess a => Int -> a -> Word64 -- | Computes a 64 bit Fnv1a hash for a value that has an Storable -- instance. -- -- The first argument is use as a salt. fnv1a :: Storable a => Int -> a -> Word64 -- | Computes a 64 bit Fnv1a hash for a value that is an instance of -- ByteArrayAccess. -- -- The first argument is use as a salt. fnv1a_bytes :: ByteArrayAccess a => Int -> a -> Word64 -- | Cuckoo Filter with -- -- -- -- The following constraints apply -- -- -- -- The implementation is not thread safe. For concurrent use the filter -- must be wrapped in a read-write lock. data CuckooFilter s (b :: Nat) (f :: Nat) (a :: Type) -- | Cuckoo filter that can be used in the IO monad. type CuckooFilterIO b f a = CuckooFilter RealWorld b f a -- | Create a new Cuckoo filter that has at least the given capacity. -- -- Enabling the TypeApplications language extension provides a -- convenient way for passing the type parameters to the function. -- --
--   >>> :set -XTypeApplications -XDataKinds -XTypeFamilies
--   
--   >>> newCuckooFilter @4 @10 @Int 0 1000
--   
-- -- The type parameters are -- -- -- -- The following constraints apply: -- -- -- -- The false positive rate depends mostly on the value of f. It -- is bounded from above by <math>. In most cases 4 is a -- good choice for b. -- -- Actual performance depends on the choice of good hash functions that -- provide high uniformity on the lower bits. -- -- The actual capacity may be much larger than what is requested, because -- the actual bucket count is a power of two. -- --
--   >>> f <- newCuckooFilter @4 @10 @Int 0 600
--   
--   >>> capacityInItems f
--   1024
--   
--   >>> sizeInAllocatedBytes f
--   1284
--   
newCuckooFilter :: forall b f a m. KnownNat b => KnownNat f => PrimMonad m => Salt -> Natural -> m (CuckooFilter (PrimState m) b f a) -- | Insert an item into the filter and return whether the operation was -- successful. If insertion fails, the filter is unchanged. An item can -- be inserted more than once. The return value indicates whether -- insertion was successful. The operation can fail when the filter -- doesn't have enough space for the item. -- -- This function is not thread safe. No concurrent writes or reads should -- occur while this function is executed. If this is needed a lock must -- be used. -- -- This function is not exception safe. The filter must not be used any -- more after an asynchronous exception has been thrown during the -- computation of this function. If this function is used in the presence -- of asynchronous exceptions it should be apprioriately masked. -- --
--   >>> f <- newCuckooFilter @4 @10 @Int 0 1000
--   
--   >>> insert f 0
--   True
--   
--   >>> insert f 0
--   True
--   
--   >>> itemCount f
--   2
--   
insert :: forall b f a m. KnownNat f => KnownNat b => PrimMonad m => CuckooFilterHash a => CuckooFilter (PrimState m) b f a -> a -> m Bool -- | Test whether an item is in the set that is represented by the Cuckoo -- filter. -- -- A negative result means that the item is definitively not in the set. -- A positive result means that the item is most likely in the set. The -- rate of false positives is bounded from above by <math> where -- b is the number of items per bucket and f is the -- size of a fingerprint in bits. -- --
--   >>> f <- newCuckooFilter @4 @10 @Int 0 1000
--   
--   >>> insert f 0
--   True
--   
--   >>> member f 0
--   True
--   
--   >>> member f 1
--   False
--   
member :: CuckooFilterHash a => PrimMonad m => KnownNat f => KnownNat b => CuckooFilter (PrimState m) b f a -> a -> m Bool -- | Delete an items from the filter. An item that was inserted more than -- once can also be deleted more than once. -- -- IMPORTANT An item must only be deleted if it was successfully -- added to the filter before (and hasn't been deleted since then). -- -- Deleting an item that isn't in the filter can result in the filter -- returning false negative results. -- -- This function is not thread safe. No concurrent writes must occur -- while this function is executed. If this is needed a lock must be -- used. Concurrent reads are fine. -- --
--   >>> f <- newCuckooFilter @4 @10 @Int 0 1000
--   
--   >>> insert f 0
--   True
--   
--   >>> insert f 0
--   True
--   
--   >>> itemCount f
--   2
--   
--   >>> delete f 0
--   True
--   
--   >>> itemCount f
--   1
--   
--   >>> member f 0
--   True
--   
--   >>> delete f 0
--   True
--   
--   >>> itemCount f
--   0
--   
--   >>> member f 0
--   False
--   
delete :: CuckooFilterHash a => PrimMonad m => KnownNat f => KnownNat b => CuckooFilter (PrimState m) b f a -> a -> m Bool -- | The total number of bytes allocated for storing items in the filter. sizeInAllocatedBytes :: forall b f a s. KnownNat f => KnownNat b => CuckooFilter s b f a -> Int -- | Total number of items that the filter can hold. In practice a load -- factor of ~95% of this number can be reached. capacityInItems :: forall b f a s. KnownNat b => CuckooFilter s b f a -> Int -- | Number of items currently stored in the filter. -- -- Note that computing this number is expensive <math>. itemCount :: forall b f a m. PrimMonad m => KnownNat b => KnownNat f => CuckooFilter (PrimState m) b f a -> m Int -- | The current load factor of the filter in percent. -- --
--   loadFactor f = 100 * itemCount f / capacityInItems
--   
-- -- Note that computing this number is expensive <math>. loadFactor :: forall b f a m. PrimMonad m => KnownNat b => KnownNat f => CuckooFilter (PrimState m) b f a -> m Double -- | Show the contents of the filter as a list of buckets with values show -- in hex. Used for debugging purposes. showFilter :: forall b f a. KnownNat f => KnownNat b => CuckooFilter RealWorld b f a -> IO [[String]] -- | Returns the different hashes that are associated with an item in the -- filter. Used for debugging purposes. itemHashes :: forall b f a s. KnownNat f => CuckooFilterHash a => CuckooFilter s b f a -> a -> (Int, Int, Word64) instance GHC.Num.Num Data.Cuckoo.Salt instance GHC.Real.Real Data.Cuckoo.Salt instance GHC.Real.Integral Data.Cuckoo.Salt instance GHC.Enum.Enum Data.Cuckoo.Salt instance GHC.Classes.Ord Data.Cuckoo.Salt instance GHC.Classes.Eq Data.Cuckoo.Salt instance GHC.Show.Show Data.Cuckoo.Salt instance GHC.Classes.Ord (Data.Cuckoo.Fingerprint f) instance GHC.Classes.Eq (Data.Cuckoo.Fingerprint f) instance GHC.Show.Show (Data.Cuckoo.Fingerprint f) instance GHC.Num.Num Data.Cuckoo.Bucket instance GHC.Real.Real Data.Cuckoo.Bucket instance GHC.Real.Integral Data.Cuckoo.Bucket instance GHC.Enum.Enum Data.Cuckoo.Bucket instance GHC.Classes.Ord Data.Cuckoo.Bucket instance GHC.Classes.Eq Data.Cuckoo.Bucket instance GHC.Show.Show Data.Cuckoo.Bucket instance GHC.Num.Num Data.Cuckoo.Slot instance GHC.Real.Real Data.Cuckoo.Slot instance GHC.Real.Integral Data.Cuckoo.Slot instance GHC.Enum.Enum Data.Cuckoo.Slot instance GHC.Classes.Ord Data.Cuckoo.Slot instance GHC.Classes.Eq Data.Cuckoo.Slot instance GHC.Show.Show Data.Cuckoo.Slot