{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} -- | Histogram metrics allow you to measure not just easy things like the min, mean, max, and standard deviation of values, but also quantiles like the median or 95th percentile. -- -- Traditionally, the way the median (or any other quantile) is calculated is to take the entire data set, sort it, and take the value in the middle (or 1% from the end, for the 99th percentile). This works for small data sets, or batch processing systems, but not for high-throughput, low-latency services. -- -- The solution for this is to sample the data as it goes through. By maintaining a small, manageable reservoir which is statistically representative of the data stream as a whole, we can quickly and easily calculate quantiles which are valid approximations of the actual quantiles. This technique is called reservoir sampling. module Data.Metrics.Histogram ( Histogram, histogram, exponentiallyDecayingHistogram, uniformHistogram, module Data.Metrics.Types ) where import Control.Monad.Primitive import qualified Data.Metrics.Histogram.Internal as P import Data.Metrics.Internal import Data.Metrics.Types import Data.Metrics.Reservoir (Reservoir) import Data.Metrics.Reservoir.Uniform (unsafeReservoir) import Data.Metrics.Reservoir.ExponentiallyDecaying (reservoir) import Data.Primitive.MutVar import Data.Time.Clock import Data.Time.Clock.POSIX import System.Random.MWC -- | A measure of the distribution of values in a stream of data. data Histogram m = Histogram { fromHistogram :: MV m P.Histogram , histogramGetSeconds :: m NominalDiffTime } instance PrimMonad m => Clear m (Histogram m) where clear h = do t <- histogramGetSeconds h updateRef (fromHistogram h) $ P.clear t instance PrimMonad m => Update m (Histogram m) Double where update h x = do t <- histogramGetSeconds h updateRef (fromHistogram h) $ P.update x t instance PrimMonad m => Count m (Histogram m) where count h = readMutVar (fromHistogram h) >>= return . P.count instance PrimMonad m => Statistics m (Histogram m) where mean h = applyWithRef (fromHistogram h) P.mean stddev h = applyWithRef (fromHistogram h) P.stddev variance h = applyWithRef (fromHistogram h) P.variance maxVal h = readMutVar (fromHistogram h) >>= return . P.maxVal minVal h = readMutVar (fromHistogram h) >>= return . P.minVal instance PrimMonad m => TakeSnapshot m (Histogram m) where snapshot h = applyWithRef (fromHistogram h) P.snapshot -- | Create a histogram using a custom time data supplier function and a custom reservoir. histogram :: PrimMonad m => m NominalDiffTime -> Reservoir -> m (Histogram m) histogram t r = do v <- newMutVar $ P.histogram r return $ Histogram v t -- | A histogram that gives all entries an equal likelihood of being evicted. -- -- Probably not what you want for most time-series data. uniformHistogram :: Seed -> IO (Histogram IO) uniformHistogram s = histogram getPOSIXTime $ unsafeReservoir s 1028 -- | The recommended histogram type. It provides a fast histogram that -- probabilistically evicts older entries using a weighting system. This -- ensures that snapshots remain relatively fresh. exponentiallyDecayingHistogram :: IO (Histogram IO) exponentiallyDecayingHistogram = do t <- getPOSIXTime s <- createSystemRandom >>= save histogram getPOSIXTime $ reservoir 0.015 1028 t s uniformSampler :: Seed -> P.Histogram uniformSampler s = P.histogram (unsafeReservoir s 1028) nan :: Double nan = 0 / 0