#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 706
#endif
#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 707
#endif
module Data.HyperLogLog.Type
(
HyperLogLog(..)
, HasHyperLogLog(..)
, size
, insert
, insertHash
, intersectionSize
, cast
) where
#if __GLASGOW_HASKELL__ < 710
import Control.Applicative
#endif
import Control.Lens
import Control.Monad
import Crypto.MAC.SipHash
import Data.Approximate.Type
import Data.Bits
import Data.Bits.Extras
import Data.Bytes.Put (runPutS)
import Data.Bytes.Serial
import Data.HyperLogLog.Config
import Data.Proxy
import Data.Semigroup
import Data.Serialize
import qualified Data.Vector.Unboxed as V
import qualified Data.Vector.Unboxed.Mutable as MV
import Data.Word
import Generics.Deriving hiding (D, to)
import GHC.Int
newtype HyperLogLog p = HyperLogLog { runHyperLogLog :: V.Vector Rank }
deriving (Eq, Show, Generic)
#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 707
type role HyperLogLog nominal
#endif
instance Serialize (HyperLogLog p)
class HasHyperLogLog a p | a -> p where
hyperLogLog :: Lens' a (HyperLogLog p)
instance HasHyperLogLog (HyperLogLog p) p where
hyperLogLog = id
_HyperLogLog :: Iso' (HyperLogLog p) (V.Vector Rank)
_HyperLogLog = iso runHyperLogLog HyperLogLog
instance ReifiesConfig p => HasConfig (HyperLogLog p) where
config = to reflectConfig
instance Semigroup (HyperLogLog p) where
HyperLogLog a <> HyperLogLog b = HyperLogLog (V.zipWith max a b)
instance ReifiesConfig p => Monoid (HyperLogLog p) where
mempty = HyperLogLog $ V.replicate (reflectConfig (Proxy :: Proxy p) ^. numBuckets) 0
mappend = (<>)
sipKey :: SipKey
sipKey = SipKey 4 7
siphash :: Serial a => a -> Word64
siphash a = h
where !bs = runPutS (serialize a)
(SipHash !h) = hash sipKey bs
insert :: (ReifiesConfig s, Serial a) => a -> HyperLogLog s -> HyperLogLog s
insert = insertHash . w32 . siphash
insertHash :: ReifiesConfig s => Word32 -> HyperLogLog s -> HyperLogLog s
insertHash h m@(HyperLogLog v) = HyperLogLog $ V.modify (\x -> do
old <- MV.read x bk
when (rnk > old) $ MV.write x bk rnk
) v where
!bk = calcBucket m h
!rnk = calcRank m h
size :: ReifiesConfig p => HyperLogLog p -> Approximate Int64
size m@(HyperLogLog bs) = Approximate 0.9972 l expected h where
m' = fromIntegral (m^.numBuckets)
numZeros = fromIntegral . V.length . V.filter (== 0) $ bs
res = case raw < m^.smallRange of
True | numZeros > 0 -> m' * log (m' / numZeros)
| otherwise -> raw
False | raw <= m^.interRange -> raw
| otherwise -> 1 * lim32 * log (1 raw / lim32)
raw = m^.rawFact * (1 / sm)
sm = V.sum $ V.map (\x -> 1 / (2 ^^ x)) bs
expected = round res
sd = err (m^.numBits)
err n = 1.04 / sqrt (fromInteger (bit n))
l = floor $ max (res*(13*sd)) 0
h = ceiling $ res*(1+3*sd)
intersectionSize :: ReifiesConfig p => [HyperLogLog p] -> Approximate Int64
intersectionSize [] = 0
intersectionSize (x:xs) = withMin 0 $ size x + intersectionSize xs intersectionSize (mappend x <$> xs)
cast :: forall p q. (ReifiesConfig p, ReifiesConfig q) => HyperLogLog p -> Maybe (HyperLogLog q)
cast old
| newBuckets <= oldBuckets = Just $ over _HyperLogLog ?? mempty $ V.modify $ \m ->
V.forM_ (V.indexed $ old^._HyperLogLog) $ \ (i,o) -> do
let j = mod i newBuckets
a <- MV.read m j
MV.write m j (max o a)
| otherwise = Nothing
where
newConfig = reflectConfig (Proxy :: Proxy q)
newBuckets = newConfig^.numBuckets
oldBuckets = old^.numBuckets