{-# LANGUAGE FlexibleInstances      #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GADTs                  #-}
-- | 'TDigest' postprocessing functions.
--
-- These are re-exported from "Data.TDigest" module.
--
module Data.TDigest.Postprocess.Internal (
    -- * Histogram
    HasHistogram (..),
    HistBin (..),
    histogramFromCentroids,
    -- * Quantiles
    quantile,
    -- * Mean & variance
    --
    -- | As we have "full" histogram, we can calculate other statistical
    -- variables.
    mean,
    variance,
    -- * CDF
    cdf,
    -- * Debug
    validateHistogram,
    -- * Affine - internal
    Affine (..),
    ) where

import Data.Foldable           (toList, traverse_)
import Data.Functor.Compose    (Compose (..))
import Data.Functor.Identity   (Identity (..))
import Data.List.NonEmpty      (NonEmpty (..), nonEmpty)
import Data.Proxy              (Proxy (..))
import Data.Semigroup          (Semigroup (..))
import Data.Semigroup.Foldable (foldMap1)
import Prelude ()
import Prelude.Compat

import qualified Data.List.NonEmpty as NE

import Data.TDigest.Internal

-------------------------------------------------------------------------------
-- Histogram
-------------------------------------------------------------------------------

-- | Histogram bin
data HistBin = HistBin
    { HistBin -> Mean
hbMin       :: !Mean    -- ^ lower bound
    , HistBin -> Mean
hbMax       :: !Mean    -- ^ upper bound
    , HistBin -> Mean
hbValue     :: !Mean    -- ^ original value: @(mi + ma) / 2@
    , HistBin -> Mean
hbWeight    :: !Weight  -- ^ weight ("area" of the bar)
    , HistBin -> Mean
hbCumWeight :: !Weight  -- ^ weight from the right, excludes this bin
    }
  deriving (Int -> HistBin -> ShowS
[HistBin] -> ShowS
HistBin -> String
(Int -> HistBin -> ShowS)
-> (HistBin -> String) -> ([HistBin] -> ShowS) -> Show HistBin
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [HistBin] -> ShowS
$cshowList :: [HistBin] -> ShowS
show :: HistBin -> String
$cshow :: HistBin -> String
showsPrec :: Int -> HistBin -> ShowS
$cshowsPrec :: Int -> HistBin -> ShowS
Show)

-- | Types from which we can extract histogram.
class Affine f => HasHistogram a f | a -> f where
    histogram   :: a -> f (NonEmpty HistBin)
    totalWeight :: a -> Weight

instance (HistBin ~ e) => HasHistogram (NonEmpty HistBin) Identity where
    histogram :: NonEmpty HistBin -> Identity (NonEmpty HistBin)
histogram = NonEmpty HistBin -> Identity (NonEmpty HistBin)
forall a. a -> Identity a
Identity
    totalWeight :: NonEmpty HistBin -> Mean
totalWeight = HistBin -> Mean
tw (HistBin -> Mean)
-> (NonEmpty HistBin -> HistBin) -> NonEmpty HistBin -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty HistBin -> HistBin
forall a. NonEmpty a -> a
NE.last where
        tw :: HistBin -> Mean
tw HistBin
hb =  HistBin -> Mean
hbWeight HistBin
hb Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ HistBin -> Mean
hbCumWeight HistBin
hb

instance (HistBin ~ e) => HasHistogram [HistBin] Maybe where
    histogram :: [HistBin] -> Maybe (NonEmpty HistBin)
histogram = [HistBin] -> Maybe (NonEmpty HistBin)
forall a. [a] -> Maybe (NonEmpty a)
nonEmpty
    totalWeight :: [HistBin] -> Mean
totalWeight = Mean
-> (NonEmpty HistBin -> Mean) -> Maybe (NonEmpty HistBin) -> Mean
forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine Mean
0 NonEmpty HistBin -> Mean
forall a (f :: * -> *). HasHistogram a f => a -> Mean
totalWeight (Maybe (NonEmpty HistBin) -> Mean)
-> ([HistBin] -> Maybe (NonEmpty HistBin)) -> [HistBin] -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [HistBin] -> Maybe (NonEmpty HistBin)
forall a (f :: * -> *).
HasHistogram a f =>
a -> f (NonEmpty HistBin)
histogram

-- | Histogram from centroids
histogramFromCentroids :: NonEmpty Centroid -> NonEmpty HistBin
histogramFromCentroids :: NonEmpty Centroid -> NonEmpty HistBin
histogramFromCentroids = NonEmpty Centroid -> NonEmpty HistBin
make
  where
    make :: NonEmpty Centroid -> NonEmpty HistBin
    -- one
    make :: NonEmpty Centroid -> NonEmpty HistBin
make ((Mean
x, Mean
w) :| []) = Mean -> Mean -> Mean -> Mean -> Mean -> HistBin
HistBin Mean
x Mean
x Mean
x Mean
w Mean
0 HistBin -> [HistBin] -> NonEmpty HistBin
forall a. a -> [a] -> NonEmpty a
:| []
    -- first
    make (c1 :: Centroid
c1@(Mean
x1, Mean
w1) :| rest :: [Centroid]
rest@((Mean
x2, Mean
_) : [Centroid]
_))
        = Mean -> Mean -> Mean -> Mean -> Mean -> HistBin
HistBin Mean
x1 (Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
mid Mean
x1 Mean
x2) Mean
x1 Mean
w1 Mean
0 HistBin -> [HistBin] -> NonEmpty HistBin
forall a. a -> [a] -> NonEmpty a
:| Centroid -> Mean -> [Centroid] -> [HistBin]
iter Centroid
c1 Mean
w1 [Centroid]
rest

    -- zero
    iter :: (Mean, Weight) -> Weight -> [(Mean, Weight)] -> [HistBin]
    iter :: Centroid -> Mean -> [Centroid] -> [HistBin]
iter Centroid
_ Mean
_ [] = []
    -- middle
    iter (Mean
x0, Mean
_) Mean
t (c1 :: Centroid
c1@(Mean
x1, Mean
w1) : rest :: [Centroid]
rest@((Mean
x2, Mean
_) : [Centroid]
_))
        = Mean -> Mean -> Mean -> Mean -> Mean -> HistBin
HistBin (Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
mid Mean
x0 Mean
x1) (Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
mid Mean
x1 Mean
x2) Mean
x1 Mean
w1 Mean
tHistBin -> [HistBin] -> [HistBin]
forall a. a -> [a] -> [a]
: Centroid -> Mean -> [Centroid] -> [HistBin]
iter Centroid
c1 (Mean
t Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w1) [Centroid]
rest
    -- last
    iter (Mean
x0, Mean
_) Mean
t [(Mean
x1, Mean
w1)]
        = [Mean -> Mean -> Mean -> Mean -> Mean -> HistBin
HistBin (Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
mid Mean
x0 Mean
x1) Mean
x1 Mean
x1 Mean
w1 Mean
t]

    mid :: a -> a -> a
mid a
a a
b = (a
a a -> a -> a
forall a. Num a => a -> a -> a
+ a
b) a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
2

-------------------------------------------------------------------------------
-- Quantile
-------------------------------------------------------------------------------

-- | Quantile from the histogram.
quantile :: Double -> Weight -> NonEmpty HistBin -> Double
quantile :: Mean -> Mean -> NonEmpty HistBin -> Mean
quantile Mean
q Mean
tw = [HistBin] -> Mean
iter ([HistBin] -> Mean)
-> (NonEmpty HistBin -> [HistBin]) -> NonEmpty HistBin -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty HistBin -> [HistBin]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList
  where
    q' :: Mean
q' = Mean
q Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
tw

    iter :: [HistBin] -> Mean
iter []                          = String -> Mean
forall a. HasCallStack => String -> a
error String
"quantile: empty NonEmpty"
    iter [HistBin Mean
a Mean
b Mean
_ Mean
w Mean
t]           = Mean
a Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ (Mean
b Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
a) Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
q' Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
t) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
w
    iter (HistBin Mean
a Mean
b Mean
_ Mean
w Mean
t : [HistBin]
rest)
        | {- t < q' && -} Mean
q' Mean -> Mean -> Bool
forall a. Ord a => a -> a -> Bool
< Mean
t Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w = Mean
a Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ (Mean
b Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
a) Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
q' Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
t) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
w
        | Bool
otherwise                  = [HistBin] -> Mean
iter [HistBin]
rest

-------------------------------------------------------------------------------
-- Mean
-------------------------------------------------------------------------------

-- | Mean from the histogram.
mean :: NonEmpty HistBin -> Double
mean :: NonEmpty HistBin -> Mean
mean = Mean' -> Mean
getMean (Mean' -> Mean)
-> (NonEmpty HistBin -> Mean') -> NonEmpty HistBin -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (HistBin -> Mean') -> NonEmpty HistBin -> Mean'
forall (t :: * -> *) m a.
(Foldable1 t, Semigroup m) =>
(a -> m) -> t a -> m
foldMap1 HistBin -> Mean'
toMean
  where
    toMean :: HistBin -> Mean'
toMean (HistBin Mean
_ Mean
_ Mean
x Mean
w Mean
_) = Mean -> Mean -> Mean'
Mean Mean
w Mean
x

data Mean' = Mean !Double !Double

getMean :: Mean' -> Double
getMean :: Mean' -> Mean
getMean (Mean Mean
_ Mean
x) = Mean
x

instance Semigroup Mean' where
    Mean Mean
w1 Mean
x1 <> :: Mean' -> Mean' -> Mean'
<> Mean Mean
w2 Mean
x2 = Mean -> Mean -> Mean'
Mean Mean
w Mean
x
      where
        w :: Mean
w = Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w2
        x :: Mean
x = (Mean
x1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
x2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
w2) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
w

-- | Variance from the histogram.
variance :: NonEmpty HistBin -> Double
variance :: NonEmpty HistBin -> Mean
variance = Variance -> Mean
getVariance (Variance -> Mean)
-> (NonEmpty HistBin -> Variance) -> NonEmpty HistBin -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (HistBin -> Variance) -> NonEmpty HistBin -> Variance
forall (t :: * -> *) m a.
(Foldable1 t, Semigroup m) =>
(a -> m) -> t a -> m
foldMap1 HistBin -> Variance
toVariance
  where
    toVariance :: HistBin -> Variance
toVariance (HistBin Mean
_ Mean
_ Mean
x Mean
w Mean
_) = Mean -> Mean -> Mean -> Variance
Variance Mean
w Mean
x Mean
0

data Variance = Variance !Double !Double !Double

getVariance :: Variance -> Double
getVariance :: Variance -> Mean
getVariance (Variance Mean
w Mean
_ Mean
d) = Mean
d Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ (Mean
w Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
1)

-- See: https://izbicki.me/blog/gausian-distributions-are-monoids
instance Semigroup Variance where
    Variance Mean
w1 Mean
x1 Mean
d1 <> :: Variance -> Variance -> Variance
<> Variance Mean
w2 Mean
x2 Mean
d2 = Mean -> Mean -> Mean -> Variance
Variance Mean
w Mean
x Mean
d
      where
        w :: Mean
w = Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w2
        x :: Mean
x = (Mean
x1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
x2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
w2) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
w
        d :: Mean
d = Mean
d1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
d2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
x1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
x1) Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
x2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
x2) Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
w Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
x Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
x

-------------------------------------------------------------------------------
-- CDF - cumulative distribution function
-------------------------------------------------------------------------------

-- | Cumulative distribution function.
cdf :: Double
    -> Double  -- ^ total weight
    -> [HistBin] -> Double
cdf :: Mean -> Mean -> [HistBin] -> Mean
cdf Mean
x Mean
n = [HistBin] -> Mean
iter
  where
    iter :: [HistBin] -> Mean
iter [] = Mean
1
    iter (HistBin Mean
a Mean
b Mean
_ Mean
w Mean
t : [HistBin]
rest)
        | Mean
x Mean -> Mean -> Bool
forall a. Ord a => a -> a -> Bool
< Mean
a     = Mean
0
        | Mean
x Mean -> Mean -> Bool
forall a. Ord a => a -> a -> Bool
< Mean
b     = (Mean
t Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
x Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
a) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ (Mean
b Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
a)) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
n
        | Bool
otherwise = [HistBin] -> Mean
iter [HistBin]
rest

-------------------------------------------------------------------------------
-- Debug
-------------------------------------------------------------------------------

-- | Validate that list of 'HistBin' is a valid "histogram".
validateHistogram :: Foldable f => f HistBin -> Either String (f HistBin)
validateHistogram :: f HistBin -> Either String (f HistBin)
validateHistogram f HistBin
bs = ((HistBin, HistBin) -> Either String ())
-> [(HistBin, HistBin)] -> Either String ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ (HistBin, HistBin) -> Either String ()
validPair ([HistBin] -> [(HistBin, HistBin)]
forall b. [b] -> [(b, b)]
pairs ([HistBin] -> [(HistBin, HistBin)])
-> [HistBin] -> [(HistBin, HistBin)]
forall a b. (a -> b) -> a -> b
$ f HistBin -> [HistBin]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList f HistBin
bs) Either String ()
-> Either String (f HistBin) -> Either String (f HistBin)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> f HistBin -> Either String (f HistBin)
forall (f :: * -> *) a. Applicative f => a -> f a
pure f HistBin
bs
  where
    validPair :: (HistBin, HistBin) -> Either String ()
validPair (lb :: HistBin
lb@(HistBin Mean
_ Mean
lmax Mean
_ Mean
lwt Mean
lcw), rb :: HistBin
rb@(HistBin Mean
rmin Mean
_ Mean
_ Mean
_ Mean
rcw)) = do
        Bool -> String -> Either String ()
check (Mean
lmax Mean -> Mean -> Bool
forall a. Eq a => a -> a -> Bool
== Mean
rmin)     String
"gap between bins"
        Bool -> String -> Either String ()
check (Mean
lcw Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
lwt Mean -> Mean -> Bool
forall a. Eq a => a -> a -> Bool
== Mean
rcw) String
"mismatch in weight cumulation"
      where
        check :: Bool -> String -> Either String ()
check Bool
False String
err = String -> Either String ()
forall a b. a -> Either a b
Left (String -> Either String ()) -> String -> Either String ()
forall a b. (a -> b) -> a -> b
$ String
err String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" " String -> ShowS
forall a. [a] -> [a] -> [a]
++ (HistBin, HistBin) -> String
forall a. Show a => a -> String
show (HistBin
lb, HistBin
rb)
        check Bool
True  String
_   = () -> Either String ()
forall a b. b -> Either a b
Right ()
    pairs :: [b] -> [(b, b)]
pairs [b]
xs = [b] -> [b] -> [(b, b)]
forall a b. [a] -> [b] -> [(a, b)]
zip [b]
xs ([b] -> [(b, b)]) -> [b] -> [(b, b)]
forall a b. (a -> b) -> a -> b
$ [b] -> [b]
forall a. [a] -> [a]
tail [b]
xs

-------------------------------------------------------------------------------
-- Affine
-------------------------------------------------------------------------------

-- | Affine containers, i.e. containing at most 1 element
--
-- This class doesn't have 'traverse' analogie
-- as it would require using 'Pointed' which is disputed type class.
--
-- > traverseAff :: Pointed f => (a -> f b) -> t a -> f (t b)
--
class Traversable t => Affine t where
    -- | Like `foldMap`
    affine :: b -> (a -> b) -> t a -> b
    affine b
x a -> b
f = b -> t b -> b
forall (t :: * -> *) a. Affine t => a -> t a -> a
fromAffine b
x (t b -> b) -> (t a -> t b) -> t a -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> b) -> t a -> t b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f

    fromAffine :: a -> t a -> a
    fromAffine a
x = a -> (a -> a) -> t a -> a
forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine a
x a -> a
forall a. a -> a
id

    {-# MINIMAL fromAffine | affine #-}

instance Affine Identity    where fromAffine :: a -> Identity a -> a
fromAffine a
_ = Identity a -> a
forall a. Identity a -> a
runIdentity
instance Affine Maybe       where affine :: b -> (a -> b) -> Maybe a -> b
affine = b -> (a -> b) -> Maybe a -> b
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
instance Affine Proxy       where affine :: b -> (a -> b) -> Proxy a -> b
affine b
x a -> b
_ Proxy a
_ = b
x

-- | Composition of 'Affine' containers is 'Affine'
instance (Affine f, Affine g) => Affine (Compose f g) where
    affine :: b -> (a -> b) -> Compose f g a -> b
affine b
x a -> b
f (Compose f (g a)
c) = b -> (g a -> b) -> f (g a) -> b
forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine b
x (b -> (a -> b) -> g a -> b
forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine b
x a -> b
f) f (g a)
c