-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Approximate cardinality estimation using constant space -- -- HyperLogLog++ with MinHash for efficient cardinality and intersection -- estimation using constant space. -- -- See original AdRoll paper for details: -- http://tech.adroll.com/media/hllminhash.pdf @package hyperloglogplus @version 0.1.0.0 module Data.HyperLogLogPlus.Config -- | Min HLL precision minP :: Integer -- | Max HLL precision maxP :: Integer -- | Number of buckets for given HLL precision numBuckets :: Integer -> Int -- | Returns an estimate of an integral term in the computation of the HLL -- size. alpha :: Int -> Double -- | These are the thresholds of cardinality that represent a transition -- from LINEARCOUNTING to the bias-corrected estimates thresholds :: Vector Double -- | These data are empirical estimates of given cardinalities at -- particular precisions, used for bias estimation. -- -- These data come from http://goo.gl/iU8Ig rawEstimateData :: Vector (Vector Double) -- | These data are empirical estimates of bias at particular precisions, -- and corresponding to rawEstimateDataby interpolation, it is possible -- to get other bias estimates based on these empirical data. -- -- These data come from http://goo.gl/iU8Ig biasData :: Vector (Vector Double) module Data.HyperLogLogPlus.Type -- | HyperLogLog++ cardinality estimation paired with MinHash for -- intersection estimation -- -- -- -- Create new counter: -- --
--   >>> :set -XDataKinds
--   
--   >>> :load Data.HyperLogLogPlus
--   
--   >>> type HLL = HyperLogLogPlus 12 8192
--   
--   >>> mempty :: HLL
--   HyperLogLogPlus [ p = 12 k = 8192 ] [ minSet size = 0 ]
--   
-- -- HyperLogLogPlus and MinHash precisions are specified in a type. HLL -- precision p should be between 4 and 18, starting from 10 for -- good accuracy. -- -- MinHash precision k ideally should be greater or equal 8192 -- for decent intersection estimation. -- -- Estimating number of unique items: -- --
--   >>> size (foldr insert mempty [1 .. 75000] :: HLL)
--   75090
--   
-- -- Combine multiple counters: -- --
--   >>> size $ (foldr insert mempty [1 .. 5000] ::  HLL) <> (foldr insert mempty [3000 .. 10000] :: HLL)
--   10044
--   
-- -- Compute estimated set intersection: -- --
--   >>> intersection $ [(foldr insert mempty [1 .. 15000] ::  HLL), (foldr insert mempty [12000 .. 20000] :: HLL)]
--   3100
--   
data HyperLogLogPlus (p :: Nat) (k :: Nat) -- | Insert hashable value insert :: (KnownNat p, KnownNat k, Hashable64 a) => a -> HyperLogLogPlus p k -> HyperLogLogPlus p k -- | Insert already hashed value insertHash :: (KnownNat p, KnownNat k) => Hash64 -> HyperLogLogPlus p k -> HyperLogLogPlus p k -- | Compute estimated size of HyperLogLogPlus. If number of inserted -- values is smaller than MinHash precision this will return exact value size :: (KnownNat p, KnownNat k) => HyperLogLogPlus p k -> Word64 -- | Returns an estimate of the size of the intersection of the given -- HyperLogLogPlus objects intersection :: (KnownNat p, KnownNat k) => [HyperLogLogPlus p k] -> Word64 -- | Cast HyperLogLogPlus to new precision levels -- --
    --
  1. New HLL precision should less or equal to old one
  2. --
  3. New MinHash precision has to be less or equal to old one, or it -- can be larger, but only if number of inserted hashes in old structure -- is smaller than old precision (size limit)
  4. --
cast :: (KnownNat p1, KnownNat k1, KnownNat p2, KnownNat k2, 4 <= p2, p2 <= 18) => HyperLogLogPlus p1 k1 -> Maybe (HyperLogLogPlus p2 k2) instance GHC.Classes.Eq (Data.HyperLogLogPlus.Type.HyperLogLogPlus p k) instance GHC.TypeLits.KnownNat k => Data.Semigroup.Semigroup (Data.HyperLogLogPlus.Type.HyperLogLogPlus p k) instance (GHC.TypeLits.KnownNat p, GHC.TypeLits.KnownNat k, 4 GHC.TypeLits.<= p, p GHC.TypeLits.<= 18) => GHC.Base.Monoid (Data.HyperLogLogPlus.Type.HyperLogLogPlus p k) instance (GHC.TypeLits.KnownNat p, GHC.TypeLits.KnownNat k) => GHC.Show.Show (Data.HyperLogLogPlus.Type.HyperLogLogPlus p k) module Data.HyperLogLogPlus -- | HyperLogLog++ cardinality estimation paired with MinHash for -- intersection estimation -- -- -- -- Create new counter: -- --
--   >>> :set -XDataKinds
--   
--   >>> :load Data.HyperLogLogPlus
--   
--   >>> type HLL = HyperLogLogPlus 12 8192
--   
--   >>> mempty :: HLL
--   HyperLogLogPlus [ p = 12 k = 8192 ] [ minSet size = 0 ]
--   
-- -- HyperLogLogPlus and MinHash precisions are specified in a type. HLL -- precision p should be between 4 and 18, starting from 10 for -- good accuracy. -- -- MinHash precision k ideally should be greater or equal 8192 -- for decent intersection estimation. -- -- Estimating number of unique items: -- --
--   >>> size (foldr insert mempty [1 .. 75000] :: HLL)
--   75090
--   
-- -- Combine multiple counters: -- --
--   >>> size $ (foldr insert mempty [1 .. 5000] ::  HLL) <> (foldr insert mempty [3000 .. 10000] :: HLL)
--   10044
--   
-- -- Compute estimated set intersection: -- --
--   >>> intersection $ [(foldr insert mempty [1 .. 15000] ::  HLL), (foldr insert mempty [12000 .. 20000] :: HLL)]
--   3100
--   
data HyperLogLogPlus (p :: Nat) (k :: Nat) -- | Compute estimated size of HyperLogLogPlus. If number of inserted -- values is smaller than MinHash precision this will return exact value size :: (KnownNat p, KnownNat k) => HyperLogLogPlus p k -> Word64 -- | Insert hashable value insert :: (KnownNat p, KnownNat k, Hashable64 a) => a -> HyperLogLogPlus p k -> HyperLogLogPlus p k -- | Insert already hashed value insertHash :: (KnownNat p, KnownNat k) => Hash64 -> HyperLogLogPlus p k -> HyperLogLogPlus p k -- | Returns an estimate of the size of the intersection of the given -- HyperLogLogPlus objects intersection :: (KnownNat p, KnownNat k) => [HyperLogLogPlus p k] -> Word64 -- | Cast HyperLogLogPlus to new precision levels -- --
    --
  1. New HLL precision should less or equal to old one
  2. --
  3. New MinHash precision has to be less or equal to old one, or it -- can be larger, but only if number of inserted hashes in old structure -- is smaller than old precision (size limit)
  4. --
cast :: (KnownNat p1, KnownNat k1, KnownNat p2, KnownNat k2, 4 <= p2, p2 <= 18) => HyperLogLogPlus p1 k1 -> Maybe (HyperLogLogPlus p2 k2)