{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE UndecidableInstances #-} -- | Histogram metrics allow you to measure not just easy things like the min, mean, max, and standard deviation of values, but also quantiles like the median or 95th percentile. -- -- Traditionally, the way the median (or any other quantile) is calculated is to take the entire data set, sort it, and take the value in the middle (or 1% from the end, for the 99th percentile). This works for small data sets, or batch processing systems, but not for high-throughput, low-latency services. -- -- The solution for this is to sample the data as it goes through. By maintaining a small, manageable reservoir which is statistically representative of the data stream as a whole, we can quickly and easily calculate quantiles which are valid approximations of the actual quantiles. This technique is called reservoir sampling. module Data.Metrics.Histogram ( Histogram, histogram, exponentiallyDecayingHistogram, uniformHistogram, uniformSampler, module Data.Metrics.Types ) where import Control.Monad.Base import Control.Monad.Primitive import qualified Data.Metrics.Histogram.Internal as P import Data.Metrics.Internal import Data.Metrics.Types import Data.Metrics.Reservoir (Reservoir) import Data.Metrics.Reservoir.Uniform (unsafeReservoir) import Data.Metrics.Reservoir.ExponentiallyDecaying (reservoir) import Data.Primitive.MutVar import Data.Time.Clock import Data.Time.Clock.POSIX import System.Random.MWC -- | A measure of the distribution of values in a stream of data. data Histogram m = Histogram { fromHistogram :: !(MV m P.Histogram) , histogramGetSeconds :: !(m NominalDiffTime) } instance (MonadBase b m, PrimMonad b) => Clear b m (Histogram b) where clear h = liftBase $ do t <- histogramGetSeconds h updateRef (fromHistogram h) $ P.clear t {-# INLINEABLE clear #-} instance (MonadBase b m, PrimMonad b) => Update b m (Histogram b) Double where update h x = liftBase $ do t <- histogramGetSeconds h updateRef (fromHistogram h) $ P.update x t {-# INLINEABLE update #-} instance (MonadBase b m, PrimMonad b) => Count b m (Histogram b) where count h = liftBase $ fmap P.count $ readMutVar (fromHistogram h) {-# INLINEABLE count #-} instance (MonadBase b m, PrimMonad b) => Statistics b m (Histogram b) where mean h = liftBase $ applyWithRef (fromHistogram h) P.mean {-# INLINEABLE mean #-} stddev h = liftBase $ applyWithRef (fromHistogram h) P.stddev {-# INLINEABLE stddev #-} variance h = liftBase $ applyWithRef (fromHistogram h) P.variance {-# INLINEABLE variance #-} maxVal h = liftBase $ fmap P.maxVal $ readMutVar (fromHistogram h) {-# INLINEABLE maxVal #-} minVal h = liftBase $ fmap P.minVal $ readMutVar (fromHistogram h) {-# INLINEABLE minVal #-} instance (MonadBase b m, PrimMonad b) => TakeSnapshot b m (Histogram b) where snapshot h = liftBase $ applyWithRef (fromHistogram h) P.snapshot {-# INLINEABLE snapshot #-} -- | Create a histogram using a custom time data supplier function and a custom reservoir. histogram :: (MonadBase b m, PrimMonad b) => b NominalDiffTime -> Reservoir -> m (Histogram b) histogram t r = do v <- liftBase $ newMutVar $ P.histogram r return $ Histogram v t -- | A histogram that gives all entries an equal likelihood of being evicted. -- -- Probably not what you want for most time-series data. uniformHistogram :: MonadBase IO m => Seed -> m (Histogram IO) uniformHistogram s = liftBase $ histogram getPOSIXTime $ unsafeReservoir s 1028 -- | The recommended histogram type. It provides a fast histogram that -- probabilistically evicts older entries using a weighting system. This -- ensures that snapshots remain relatively fresh. exponentiallyDecayingHistogram :: MonadBase IO m => m (Histogram IO) exponentiallyDecayingHistogram = liftBase $ do t <- getPOSIXTime s <- createSystemRandom >>= save histogram getPOSIXTime $ reservoir 0.015 1028 t s uniformSampler :: Seed -> P.Histogram uniformSampler s = P.histogram (unsafeReservoir s 1028)