module Data.Vector.Bloom
( Bloom(Bloom)
, entries
, hashes
, width
, bloom
, modify
, insert
, elem
, union
, intersection
, freeze, thaw
, unsafeFreeze, unsafeThaw
) where
import Control.Monad hiding (forM_)
import Control.Monad.Primitive
import Control.Monad.ST
import Data.Bits
import Data.Data
import qualified Data.Foldable as F
import Data.Hashable
import Data.Semigroup
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Bloom.Mutable as MB
import Data.Vector.Bloom.Mutable (MBloom(MBloom))
import Data.Vector.Bloom.Util
import Data.Word
import Prelude hiding (elem)
data Bloom
= Bloom
{ hashes :: !Int
, _mask :: !Int
, _bits :: !(U.Vector Word64)
}
deriving (Eq,Ord,Show,Read,Typeable,Data)
bloom :: (F.Foldable f, Hashable a) => Int -> Int -> f a -> Bloom
bloom k m fa = runST $ do
mb <- MB.mbloom k m
F.forM_ fa $ \a -> MB.insert a mb
freeze mb
entries :: Bloom -> Int
entries (Bloom _ _ v) = U.foldl' (\r a -> r + popCount a) 0 v
union :: Bloom -> Bloom -> Bloom
union (Bloom k1 m v1) (Bloom k2 n v2) = Bloom (min k1 k2) (max m n) v3 where
v3 = U.generate (U.length v1 `max` U.length v2) $ \i -> U.unsafeIndex v1 (i .&. m) .|. U.unsafeIndex v2 (i .&. n)
intersection :: Bloom -> Bloom -> Bloom
intersection (Bloom k1 m v1) (Bloom k2 n v2) = Bloom (min k1 k2) (max m n) v3 where
v3 = U.generate (U.length v1 `max` U.length v2) $ \i -> U.unsafeIndex v1 (i .&. m) .&. U.unsafeIndex v2 (i .&. n)
elem :: Hashable a => a -> Bloom -> Bool
elem a (Bloom k m v)
| m > 32767, h:hs <- rehash k a, p <- unsafeShiftL h 15 =
all (\i -> let im = (p+(i.&.32767)).&.m in testBit (U.unsafeIndex v (unsafeShiftR im 6)) (i .&. 63)) hs
| otherwise =
all (\i -> let im = i.&.m in testBit (U.unsafeIndex v (unsafeShiftR im 6)) (im .&. 63)) (rehash k a)
insert :: Hashable a => a -> Bloom -> Bloom
insert a b = modify (MB.insert a) b
modify :: (forall s. MBloom s -> ST s ()) -> Bloom -> Bloom
modify f (Bloom a m v) = Bloom a m (U.modify (f . MBloom a m) v)
width :: Bloom -> Int
width (Bloom _ m _) = m + 1
instance Semigroup Bloom where
(<>) = union
freeze :: PrimMonad m => MBloom (PrimState m) -> m Bloom
freeze (MBloom k m bs) = Bloom k m `liftM` U.freeze bs
thaw :: PrimMonad m => Bloom -> m (MBloom (PrimState m))
thaw (Bloom k m bs) = MBloom k m `liftM` U.thaw bs
unsafeFreeze :: PrimMonad m => MBloom (PrimState m) -> m Bloom
unsafeFreeze (MBloom k m bs) = Bloom k m `liftM` U.unsafeFreeze bs
unsafeThaw :: PrimMonad m => Bloom -> m (MBloom (PrimState m))
unsafeThaw (Bloom k m bs) = MBloom k m `liftM` U.unsafeThaw bs