-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | An approximate streaming (constant space) unique object counter -- -- This package provides an approximate streaming (constant space) unique -- object counter. -- -- See the original paper for details: -- http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf -- -- Notably it can be used to approximate a set of several billion -- elements with 1-2% inaccuracy in around 1.5k of memory. @package hyperloglog @version 0.5 module Data.HyperLogLog.Config numBuckets :: Integer -> Int smallRange :: Integer -> Double interRange :: Double rawFact :: Integer -> Double alpha :: Integer -> Double bucketMask :: Integer -> Word32 type Rank = Int8 calcBucket :: Integer -> Word32 -> Int calcRank :: Integer -> Word32 -> Int8 lim32 :: Double -- | This package provides an approximate streaming (constant space) unique -- object counter. -- -- See the original paper for details: -- http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf module Data.HyperLogLog.Type data DefaultSipKey type DefaultHyperLogLog = HyperLogLog DefaultSipKey -- | SigHash Key data SipKey -- | Promote a SipKey to the type level for use as part of a -- HyperLogLog type. reifySipKey :: Word64 -> Word64 -> (forall (s :: Type). Reifies s SipKey => Proxy s -> r) -> r -- | Initialize a new counter: -- --
-- >>> runHyperLogLog (mempty :: DefaultHyperLogLog 3) == V.fromList [0,0,0,0,0,0,0,0] -- True ---- -- Please note how you specify a counter size with the n -- invocation. Sizes of up to 16 are valid, with 7 being a likely good -- minimum for decent accuracy. -- -- Let's count a list of unique items and get the latest estimate: -- --
-- >>> size (foldr insert mempty [1..10] :: DefaultHyperLogLog 4)
-- Approximate {_confidence = 0.9972, _lo = 2, _estimate = 9, _hi = 17}
--
--
-- Note how insert can be used to add new observations to the
-- approximate counter.
--
-- The s type parameter configures the SipKey that is
-- passed to the hash function when inserting a new value. Note
-- that if cryptographic security is a primary consideration, it is
-- recommended that you create HyperLogLog values using
-- generateHyperLogLog so that the SipKey is randomly
-- generated using system entropy. In contrast, the HyperLogLog
-- data constructor and the mempty method allow constructing
-- HyperLogLog values with fixed SipKeys, which can result
-- in exponentially inaccurate estimates if exploited by an adversary.
-- (See https://eprint.iacr.org/2021/1139.)
newtype HyperLogLog s p
-- | Construct a HyperLogLog value directly from a Vector.
--
-- Note that using this data constructor directly permits the s
-- type parameter to be a fixed SipKey, which can have
-- cryptographic security implications. See the Haddocks for
-- HyperLogLog for more details.
HyperLogLog :: Vector Rank -> HyperLogLog s p
[runHyperLogLog] :: HyperLogLog s p -> Vector Rank
-- | Generate a fresh HyperLogLog value using a randomly generated
-- SipKey:
--
-- -- >>> generateHyperLogLog $ \(m :: HyperLogLog s 3) -> pure (runHyperLogLog m == V.fromList [0,0,0,0,0,0,0,0]) -- True ---- -- The SipKey is generated using system entropy, so if -- cryptographic security is a primary consideration, use this function -- to create a HyperLogLog value instead of manually building one -- (e.g., by using the HyperLogLog data constructor or by using -- mempty). generateHyperLogLog :: Reifies p Integer => (forall (s :: Type). HyperLogLog s p -> IO r) -> IO r class HasHyperLogLog a s p | a -> s p hyperLogLog :: HasHyperLogLog a s p => Lens' a (HyperLogLog s p) -- | Approximate size of our set size :: Reifies p Integer => HyperLogLog s p -> Approximate Int64 insert :: forall s p a. (Reifies s SipKey, Reifies p Integer, Serial a) => a -> HyperLogLog s p -> HyperLogLog s p -- | Insert a value that has already been hashed by whatever user defined -- hash function you want. insertHash :: Reifies p Integer => Word32 -> HyperLogLog s p -> HyperLogLog s p intersectionSize :: Reifies p Integer => [HyperLogLog s p] -> Approximate Int64 cast :: forall p q s. (Reifies p Integer, Reifies q Integer) => HyperLogLog s p -> Maybe (HyperLogLog s q) -- | If the two types p and q reify the same -- configuration, and if the two types r and s reify -- the same SipKey, then we can coerce between -- HyperLogLog r p and HyperLogLog s q. -- We do this by building a hole in the nominal role for the -- configuration parameter. coerceConfig :: forall p q r s. (Reifies p Integer, Reifies q Integer, Reifies r SipKey, Reifies s SipKey) => Maybe (Coercion (HyperLogLog r p) (HyperLogLog s q)) instance forall k1 (s :: k1) k2 (p :: k2). Control.DeepSeq.NFData (Data.HyperLogLog.Type.HyperLogLog s p) instance forall k1 (s :: k1) k2 (p :: k2). GHC.Generics.Generic (Data.HyperLogLog.Type.HyperLogLog s p) instance forall k1 (s :: k1) k2 (p :: k2). GHC.Show.Show (Data.HyperLogLog.Type.HyperLogLog s p) instance forall k1 (s :: k1) k2 (p :: k2). GHC.Classes.Eq (Data.HyperLogLog.Type.HyperLogLog s p) instance forall k1 k2 (s :: k1) (p :: k2). Data.HyperLogLog.Type.HasHyperLogLog (Data.HyperLogLog.Type.HyperLogLog s p) s p instance Data.Reflection.Reifies Data.HyperLogLog.Type.DefaultSipKey Crypto.MAC.SipHash.SipKey instance forall k1 k2 (s :: k1) (p :: k2). Data.Serialize.Serialize (Data.HyperLogLog.Type.HyperLogLog s p) instance forall k1 k2 (s :: k1) (p :: k2). Data.Bytes.Serial.Serial (Data.HyperLogLog.Type.HyperLogLog s p) instance forall k1 k2 (s :: k1) (p :: k2). Data.Binary.Class.Binary (Data.HyperLogLog.Type.HyperLogLog s p) instance forall k1 k2 (s :: k1) (p :: k2). GHC.Base.Semigroup (Data.HyperLogLog.Type.HyperLogLog s p) instance forall k1 k2 (p :: k1) (s :: k2). Data.Reflection.Reifies p GHC.Num.Integer.Integer => GHC.Base.Monoid (Data.HyperLogLog.Type.HyperLogLog s p) -- | See the original paper for details: -- http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf module Data.HyperLogLog -- | Initialize a new counter: -- --
-- >>> runHyperLogLog (mempty :: DefaultHyperLogLog 3) == V.fromList [0,0,0,0,0,0,0,0] -- True ---- -- Please note how you specify a counter size with the n -- invocation. Sizes of up to 16 are valid, with 7 being a likely good -- minimum for decent accuracy. -- -- Let's count a list of unique items and get the latest estimate: -- --
-- >>> size (foldr insert mempty [1..10] :: DefaultHyperLogLog 4)
-- Approximate {_confidence = 0.9972, _lo = 2, _estimate = 9, _hi = 17}
--
--
-- Note how insert can be used to add new observations to the
-- approximate counter.
--
-- The s type parameter configures the SipKey that is
-- passed to the hash function when inserting a new value. Note
-- that if cryptographic security is a primary consideration, it is
-- recommended that you create HyperLogLog values using
-- generateHyperLogLog so that the SipKey is randomly
-- generated using system entropy. In contrast, the HyperLogLog
-- data constructor and the mempty method allow constructing
-- HyperLogLog values with fixed SipKeys, which can result
-- in exponentially inaccurate estimates if exploited by an adversary.
-- (See https://eprint.iacr.org/2021/1139.)
data HyperLogLog s p
class HasHyperLogLog a s p | a -> s p
hyperLogLog :: HasHyperLogLog a s p => Lens' a (HyperLogLog s p)
-- | Approximate size of our set
size :: Reifies p Integer => HyperLogLog s p -> Approximate Int64
intersectionSize :: Reifies p Integer => [HyperLogLog s p] -> Approximate Int64
insert :: forall s p a. (Reifies s SipKey, Reifies p Integer, Serial a) => a -> HyperLogLog s p -> HyperLogLog s p
-- | Insert a value that has already been hashed by whatever user defined
-- hash function you want.
insertHash :: Reifies p Integer => Word32 -> HyperLogLog s p -> HyperLogLog s p
cast :: forall p q s. (Reifies p Integer, Reifies q Integer) => HyperLogLog s p -> Maybe (HyperLogLog s q)
-- | If the two types p and q reify the same
-- configuration, and if the two types r and s reify
-- the same SipKey, then we can coerce between
-- HyperLogLog r p and HyperLogLog s q.
-- We do this by building a hole in the nominal role for the
-- configuration parameter.
coerceConfig :: forall p q r s. (Reifies p Integer, Reifies q Integer, Reifies r SipKey, Reifies s SipKey) => Maybe (Coercion (HyperLogLog r p) (HyperLogLog s q))