module Data.TDigest.Postprocess (
    -- * Histogram
    I.HasHistogram (..),
    I.HistBin (..),
    -- * Quantiles
    median,
    quantile,
    -- * Mean & variance
    --
    -- | As we have "full" histogram, we can calculate other statistical
    -- variables.
    mean,
    variance,
    stddev,
    -- * CDF
    cdf,
    icdf,
    -- * Affine
    I.Affine (..)
    ) where

import qualified Data.List.NonEmpty as NE
import           Prelude ()
import           Prelude.Compat

import qualified Data.TDigest.Postprocess.Internal as I

-- | Median, i.e. @'quantile' 0.5@.
median :: I.HasHistogram a f => a -> f Double
median :: a -> f Double
median = Double -> a -> f Double
forall a (f :: * -> *). HasHistogram a f => Double -> a -> f Double
quantile Double
0.5

-- | Calculate quantile of a specific value.
quantile :: I.HasHistogram a f => Double -> a -> f Double
quantile :: Double -> a -> f Double
quantile Double
q a
x = Double -> Double -> NonEmpty HistBin -> Double
I.quantile Double
q (a -> Double
forall a (f :: * -> *). HasHistogram a f => a -> Double
I.totalWeight a
x) (NonEmpty HistBin -> Double) -> f (NonEmpty HistBin) -> f Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> a -> f (NonEmpty HistBin)
forall a (f :: * -> *).
HasHistogram a f =>
a -> f (NonEmpty HistBin)
I.histogram a
x

-- | Mean.
--
-- >>> mean (Tree.tdigest [1..100] :: Tree.TDigest 10)
-- Just 50.5
--
-- /Note:/ if you only need the mean, calculate it directly.
--
mean :: I.HasHistogram a f => a -> f Double
mean :: a -> f Double
mean a
x = NonEmpty HistBin -> Double
I.mean (NonEmpty HistBin -> Double) -> f (NonEmpty HistBin) -> f Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> a -> f (NonEmpty HistBin)
forall a (f :: * -> *).
HasHistogram a f =>
a -> f (NonEmpty HistBin)
I.histogram a
x

-- | Variance.
--
variance :: I.HasHistogram a f => a -> f Double
variance :: a -> f Double
variance a
x = NonEmpty HistBin -> Double
I.variance (NonEmpty HistBin -> Double) -> f (NonEmpty HistBin) -> f Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> a -> f (NonEmpty HistBin)
forall a (f :: * -> *).
HasHistogram a f =>
a -> f (NonEmpty HistBin)
I.histogram a
x

-- | Standard deviation, square root of variance.
stddev :: I.HasHistogram a f => a -> f Double
stddev :: a -> f Double
stddev = (Double -> Double) -> f Double -> f Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Double -> Double
forall a. Floating a => a -> a
sqrt (f Double -> f Double) -> (a -> f Double) -> a -> f Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> f Double
forall a (f :: * -> *). HasHistogram a f => a -> f Double
variance

-- | Cumulative distribution function.
--
-- /Note:/ if this is the only thing you need, it's more efficient to count
-- this directly.
cdf :: I.HasHistogram a f => Double -> a -> Double
cdf :: Double -> a -> Double
cdf Double
q a
x = Double
-> (NonEmpty HistBin -> Double) -> f (NonEmpty HistBin) -> Double
forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
I.affine Double
1 (Double -> Double -> [HistBin] -> Double
I.cdf Double
q (a -> Double
forall a (f :: * -> *). HasHistogram a f => a -> Double
I.totalWeight a
x) ([HistBin] -> Double)
-> (NonEmpty HistBin -> [HistBin]) -> NonEmpty HistBin -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty HistBin -> [HistBin]
forall a. NonEmpty a -> [a]
NE.toList) (f (NonEmpty HistBin) -> Double) -> f (NonEmpty HistBin) -> Double
forall a b. (a -> b) -> a -> b
$ a -> f (NonEmpty HistBin)
forall a (f :: * -> *).
HasHistogram a f =>
a -> f (NonEmpty HistBin)
I.histogram a
x

-- | An alias for 'quantile'.
icdf :: I.HasHistogram a f => Double -> a -> f Double
icdf :: Double -> a -> f Double
icdf = Double -> a -> f Double
forall a (f :: * -> *). HasHistogram a f => Double -> a -> f Double
quantile

-- $setup
-- >>> :set -XDataKinds
-- >>> import qualified Data.TDigest.Tree as Tree