-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Approximate cardinality estimation using constant space -- -- HyperLogLog++ with MinHash for efficient cardinality and intersection -- estimation using constant space. -- -- See original AdRoll paper for details: -- http://tech.adroll.com/media/hllminhash.pdf @package hyperloglogplus @version 0.1.0.0 module Data.HyperLogLogPlus.Config -- | Min HLL precision minP :: Integer -- | Max HLL precision maxP :: Integer -- | Number of buckets for given HLL precision numBuckets :: Integer -> Int -- | Returns an estimate of an integral term in the computation of the HLL -- size. alpha :: Int -> Double -- | These are the thresholds of cardinality that represent a transition -- from LINEARCOUNTING to the bias-corrected estimates thresholds :: Vector Double -- | These data are empirical estimates of given cardinalities at -- particular precisions, used for bias estimation. -- -- These data come from http://goo.gl/iU8Ig rawEstimateData :: Vector (Vector Double) -- | These data are empirical estimates of bias at particular precisions, -- and corresponding to rawEstimateDataby interpolation, it is possible -- to get other bias estimates based on these empirical data. -- -- These data come from http://goo.gl/iU8Ig biasData :: Vector (Vector Double) module Data.HyperLogLogPlus.Type -- | HyperLogLog++ cardinality estimation paired with MinHash for -- intersection estimation -- --
-- >>> :set -XDataKinds -- -- >>> :load Data.HyperLogLogPlus -- -- >>> type HLL = HyperLogLogPlus 12 8192 -- -- >>> mempty :: HLL -- HyperLogLogPlus [ p = 12 k = 8192 ] [ minSet size = 0 ] ---- -- HyperLogLogPlus and MinHash precisions are specified in a type. HLL -- precision p should be between 4 and 18, starting from 10 for -- good accuracy. -- -- MinHash precision k ideally should be greater or equal 8192 -- for decent intersection estimation. -- -- Estimating number of unique items: -- --
-- >>> size (foldr insert mempty [1 .. 75000] :: HLL) -- 75090 ---- -- Combine multiple counters: -- --
-- >>> size $ (foldr insert mempty [1 .. 5000] :: HLL) <> (foldr insert mempty [3000 .. 10000] :: HLL) -- 10044 ---- -- Compute estimated set intersection: -- --
-- >>> intersection $ [(foldr insert mempty [1 .. 15000] :: HLL), (foldr insert mempty [12000 .. 20000] :: HLL)] -- 3100 --data HyperLogLogPlus (p :: Nat) (k :: Nat) -- | Insert hashable value insert :: (KnownNat p, KnownNat k, Hashable64 a) => a -> HyperLogLogPlus p k -> HyperLogLogPlus p k -- | Insert already hashed value insertHash :: (KnownNat p, KnownNat k) => Hash64 -> HyperLogLogPlus p k -> HyperLogLogPlus p k -- | Compute estimated size of HyperLogLogPlus. If number of inserted -- values is smaller than MinHash precision this will return exact value size :: (KnownNat p, KnownNat k) => HyperLogLogPlus p k -> Word64 -- | Returns an estimate of the size of the intersection of the given -- HyperLogLogPlus objects intersection :: (KnownNat p, KnownNat k) => [HyperLogLogPlus p k] -> Word64 -- | Cast HyperLogLogPlus to new precision levels -- --
-- >>> :set -XDataKinds -- -- >>> :load Data.HyperLogLogPlus -- -- >>> type HLL = HyperLogLogPlus 12 8192 -- -- >>> mempty :: HLL -- HyperLogLogPlus [ p = 12 k = 8192 ] [ minSet size = 0 ] ---- -- HyperLogLogPlus and MinHash precisions are specified in a type. HLL -- precision p should be between 4 and 18, starting from 10 for -- good accuracy. -- -- MinHash precision k ideally should be greater or equal 8192 -- for decent intersection estimation. -- -- Estimating number of unique items: -- --
-- >>> size (foldr insert mempty [1 .. 75000] :: HLL) -- 75090 ---- -- Combine multiple counters: -- --
-- >>> size $ (foldr insert mempty [1 .. 5000] :: HLL) <> (foldr insert mempty [3000 .. 10000] :: HLL) -- 10044 ---- -- Compute estimated set intersection: -- --
-- >>> intersection $ [(foldr insert mempty [1 .. 15000] :: HLL), (foldr insert mempty [12000 .. 20000] :: HLL)] -- 3100 --data HyperLogLogPlus (p :: Nat) (k :: Nat) -- | Compute estimated size of HyperLogLogPlus. If number of inserted -- values is smaller than MinHash precision this will return exact value size :: (KnownNat p, KnownNat k) => HyperLogLogPlus p k -> Word64 -- | Insert hashable value insert :: (KnownNat p, KnownNat k, Hashable64 a) => a -> HyperLogLogPlus p k -> HyperLogLogPlus p k -- | Insert already hashed value insertHash :: (KnownNat p, KnownNat k) => Hash64 -> HyperLogLogPlus p k -> HyperLogLogPlus p k -- | Returns an estimate of the size of the intersection of the given -- HyperLogLogPlus objects intersection :: (KnownNat p, KnownNat k) => [HyperLogLogPlus p k] -> Word64 -- | Cast HyperLogLogPlus to new precision levels -- --