{-# LANGUAGE DataKinds #-}
{-# LANGUAGE RebindableSyntax #-}

-- | Mealy quantile statistics.
module Data.Mealy.Quantiles
  ( median,
    quantiles,
    digitize,
    signalize,
  )
where

import Control.Monad.ST
import Data.Mealy
import Data.Ord
import Data.TDigest hiding (median)
import Data.TDigest.Internal
import Data.TDigest.Tree.Internal (TDigest (..), absMaxSize, emptyTDigest, insertCentroid, relMaxSize, size, toMVector)
import Data.Vector.Algorithms.Heap qualified as VHeap
import Data.Vector.Unboxed qualified as VU
import NumHask.Prelude hiding (fold)

data OnlineTDigest = OnlineTDigest
  { OnlineTDigest -> TDigest 25
td :: TDigest 25,
    OnlineTDigest -> Int
tdN :: Int,
    OnlineTDigest -> Weight
tdRate :: Double
  }
  deriving (Int -> OnlineTDigest -> ShowS
[OnlineTDigest] -> ShowS
OnlineTDigest -> String
(Int -> OnlineTDigest -> ShowS)
-> (OnlineTDigest -> String)
-> ([OnlineTDigest] -> ShowS)
-> Show OnlineTDigest
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> OnlineTDigest -> ShowS
showsPrec :: Int -> OnlineTDigest -> ShowS
$cshow :: OnlineTDigest -> String
show :: OnlineTDigest -> String
$cshowList :: [OnlineTDigest] -> ShowS
showList :: [OnlineTDigest] -> ShowS
Show)

emptyOnlineTDigest :: Double -> OnlineTDigest
emptyOnlineTDigest :: Weight -> OnlineTDigest
emptyOnlineTDigest = TDigest 25 -> Int -> Weight -> OnlineTDigest
OnlineTDigest (TDigest n
forall {n :: Nat}. TDigest n
emptyTDigest :: TDigest n) Int
0

-- | Mealy quantiles based on the tdigest library
quantiles :: Double -> [Double] -> Mealy Double [Double]
quantiles :: Weight -> [Weight] -> Mealy Weight [Weight]
quantiles Weight
r [Weight]
qs = (Weight -> OnlineTDigest)
-> (OnlineTDigest -> Weight -> OnlineTDigest)
-> (OnlineTDigest -> [Weight])
-> Mealy Weight [Weight]
forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M Weight -> OnlineTDigest
inject OnlineTDigest -> Weight -> OnlineTDigest
step OnlineTDigest -> [Weight]
extract
  where
    step :: OnlineTDigest -> Weight -> OnlineTDigest
step OnlineTDigest
x Weight
a = Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a OnlineTDigest
x
    inject :: Weight -> OnlineTDigest
inject Weight
a = Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a (Weight -> OnlineTDigest
emptyOnlineTDigest Weight
r)
    extract :: OnlineTDigest -> [Weight]
extract OnlineTDigest
x = Weight -> Maybe Weight -> Weight
forall a. a -> Maybe a -> a
fromMaybe (Weight
0 Weight -> Weight -> Weight
forall a. Divisive a => a -> a -> a
/ Weight
0) (Maybe Weight -> Weight)
-> (Weight -> Maybe Weight) -> Weight -> Weight
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Weight -> TDigest 25 -> Maybe Weight
forall (comp :: Nat). Weight -> TDigest comp -> Maybe Weight
`quantile` TDigest 25
t) (Weight -> Weight) -> [Weight] -> [Weight]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Weight]
qs
      where
        (OnlineTDigest TDigest 25
t Int
_ Weight
_) = OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
x

-- | Mealy median using 'tdigest'
--
-- The tdigest algorithm works best at extremes and can be unreliable in the centre.
median :: Double -> Mealy Double Double
median :: Weight -> Mealy Weight Weight
median Weight
r = (Weight -> OnlineTDigest)
-> (OnlineTDigest -> Weight -> OnlineTDigest)
-> (OnlineTDigest -> Weight)
-> Mealy Weight Weight
forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M Weight -> OnlineTDigest
inject OnlineTDigest -> Weight -> OnlineTDigest
step OnlineTDigest -> Weight
extract
  where
    step :: OnlineTDigest -> Weight -> OnlineTDigest
step OnlineTDigest
x Weight
a = Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a OnlineTDigest
x
    inject :: Weight -> OnlineTDigest
inject Weight
a = Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a (Weight -> OnlineTDigest
emptyOnlineTDigest Weight
r)
    extract :: OnlineTDigest -> Weight
extract OnlineTDigest
x = Weight -> Maybe Weight -> Weight
forall a. a -> Maybe a -> a
fromMaybe (Weight
0 Weight -> Weight -> Weight
forall a. Divisive a => a -> a -> a
/ Weight
0) (Weight -> TDigest 25 -> Maybe Weight
forall (comp :: Nat). Weight -> TDigest comp -> Maybe Weight
quantile Weight
0.5 TDigest 25
t)
      where
        (OnlineTDigest TDigest 25
t Int
_ Weight
_) = OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
x

onlineInsert' :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert' :: Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert' Weight
x (OnlineTDigest TDigest 25
td' Int
n Weight
r) =
  TDigest 25 -> Int -> Weight -> OnlineTDigest
OnlineTDigest
    (Centroid -> TDigest 25 -> TDigest 25
forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid (Weight
x, Weight
r Weight -> Integer -> Weight
forall b a.
(Ord b, Divisive a, Subtractive b, Integral b) =>
a -> b -> a
^^ (-(Int -> Integer
forall a b. FromIntegral a b => b -> a
fromIntegral (Int -> Integer) -> Int -> Integer
forall a b. (a -> b) -> a -> b
$ Int
n Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
1) :: Integer)) TDigest 25
td')
    (Int
n Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
1)
    Weight
r

onlineInsert :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert :: Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
x OnlineTDigest
otd = OnlineTDigest -> OnlineTDigest
onlineCompress (Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert' Weight
x OnlineTDigest
otd)

onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress otd :: OnlineTDigest
otd@(OnlineTDigest TDigest 25
Nil Int
_ Weight
_) = OnlineTDigest
otd
onlineCompress otd :: OnlineTDigest
otd@(OnlineTDigest TDigest 25
t Int
_ Weight
_)
  | TDigest 25 -> Int
forall (comp :: Nat). TDigest comp -> Int
Data.TDigest.Tree.Internal.size TDigest 25
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
relMaxSize Int -> Int -> Int
forall a. Multiplicative a => a -> a -> a
* Int
compression
      Bool -> Bool -> Bool
&& TDigest 25 -> Int
forall (comp :: Nat). TDigest comp -> Int
Data.TDigest.Tree.Internal.size TDigest 25
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
absMaxSize =
      OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
otd
  | Bool
otherwise = OnlineTDigest
otd
  where
    compression :: Int
compression = Int
25

onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress otd :: OnlineTDigest
otd@(OnlineTDigest TDigest 25
Nil Int
_ Weight
_) = OnlineTDigest
otd
onlineForceCompress (OnlineTDigest TDigest 25
t Int
n Weight
r) = TDigest 25 -> Int -> Weight -> OnlineTDigest
OnlineTDigest TDigest 25
t' Int
0 Weight
r
  where
    t' :: TDigest 25
t' =
      (TDigest 25 -> Centroid -> TDigest 25)
-> TDigest 25 -> [Centroid] -> TDigest 25
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Centroid -> TDigest 25 -> TDigest 25)
-> TDigest 25 -> Centroid -> TDigest 25
forall a b c. (a -> b -> c) -> b -> a -> c
flip Centroid -> TDigest 25 -> TDigest 25
forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest 25
forall {n :: Nat}. TDigest n
emptyTDigest ([Centroid] -> TDigest 25) -> [Centroid] -> TDigest 25
forall a b. (a -> b) -> a -> b
$
        (\(Weight
m, Weight
w) -> (Weight
m, Weight
w Weight -> Weight -> Weight
forall a. Multiplicative a => a -> a -> a
* (Weight
r Weight -> Int -> Weight
forall b a.
(Ord b, Divisive a, Subtractive b, Integral b) =>
a -> b -> a
^^ Int
n))) (Centroid -> Centroid)
-> ((Centroid, Weight) -> Centroid)
-> (Centroid, Weight)
-> Centroid
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Centroid, Weight) -> Centroid
forall a b. (a, b) -> a
fst ((Centroid, Weight) -> Centroid)
-> [(Centroid, Weight)] -> [Centroid]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Vector (Centroid, Weight) -> [(Centroid, Weight)]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector (Centroid, Weight)
centroids
    -- Centroids are shuffled based on space
    centroids :: VU.Vector (Centroid, Double)
    centroids :: Vector (Centroid, Weight)
centroids =
      (forall s. ST s (Vector (Centroid, Weight)))
-> Vector (Centroid, Weight)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector (Centroid, Weight)))
 -> Vector (Centroid, Weight))
-> (forall s. ST s (Vector (Centroid, Weight)))
-> Vector (Centroid, Weight)
forall a b. (a -> b) -> a -> b
$ do
        MVector s (Centroid, Weight)
v <- TDigest 25 -> ST s (MVector s (Centroid, Weight))
forall (comp :: Nat) s.
KnownNat comp =>
TDigest comp -> ST s (MVector s (Centroid, Weight))
toMVector TDigest 25
t
        -- sort by cumulative weight
        Comparison (Centroid, Weight)
-> MVector (PrimState (ST s)) (Centroid, Weight) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VHeap.sortBy (((Centroid, Weight) -> Weight) -> Comparison (Centroid, Weight)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Centroid, Weight) -> Weight
forall a b. (a, b) -> b
snd) MVector s (Centroid, Weight)
MVector (PrimState (ST s)) (Centroid, Weight)
v
        MVector (PrimState (ST s)) (Centroid, Weight)
-> ST s (Vector (Centroid, Weight))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Centroid, Weight)
MVector (PrimState (ST s)) (Centroid, Weight)
v

-- | A mealy that computes the running quantile bucket. For example,
-- in a scan, @digitize 0.9 [0.5]@ returns:
--
-- * 0 if the current value is less than the current mealy median.
--
-- * 1 if the current value is greater than the current mealy median.
digitize :: Double -> [Double] -> Mealy Double Int
digitize :: Weight -> [Weight] -> Mealy Weight Int
digitize Weight
r [Weight]
qs = (Weight -> (OnlineTDigest, Weight))
-> ((OnlineTDigest, Weight) -> Weight -> (OnlineTDigest, Weight))
-> ((OnlineTDigest, Weight) -> Int)
-> Mealy Weight Int
forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M Weight -> (OnlineTDigest, Weight)
inject (OnlineTDigest, Weight) -> Weight -> (OnlineTDigest, Weight)
forall {b}. (OnlineTDigest, b) -> Weight -> (OnlineTDigest, Weight)
step (OnlineTDigest, Weight) -> Int
forall {a}.
(Additive a, FromInteger a) =>
(OnlineTDigest, Weight) -> a
extract
  where
    step :: (OnlineTDigest, b) -> Weight -> (OnlineTDigest, Weight)
step (OnlineTDigest
x, b
_) Weight
a = (Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a OnlineTDigest
x, Weight
a)
    inject :: Weight -> (OnlineTDigest, Weight)
inject Weight
a = (Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a (Weight -> OnlineTDigest
emptyOnlineTDigest Weight
r), Weight
a)
    extract :: (OnlineTDigest, Weight) -> a
extract (OnlineTDigest
x, Weight
l) = [Weight] -> Weight -> a
forall {a} {a}. (Additive a, Ord a, FromInteger a) => [a] -> a -> a
bucket' [Weight]
qs' Weight
l
      where
        qs' :: [Weight]
qs' = Weight -> Maybe Weight -> Weight
forall a. a -> Maybe a -> a
fromMaybe (Weight
0 Weight -> Weight -> Weight
forall a. Divisive a => a -> a -> a
/ Weight
0) (Maybe Weight -> Weight)
-> (Weight -> Maybe Weight) -> Weight -> Weight
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Weight -> TDigest 25 -> Maybe Weight
forall (comp :: Nat). Weight -> TDigest comp -> Maybe Weight
`quantile` TDigest 25
t) (Weight -> Weight) -> [Weight] -> [Weight]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Weight]
qs
        (OnlineTDigest TDigest 25
t Int
_ Weight
_) = OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
x
        bucket' :: [a] -> a -> a
bucket' [a]
xs a
l' =
          Mealy a a -> [a] -> a
forall a b. Mealy a b -> [a] -> b
fold ((a -> a) -> (a -> a -> a) -> (a -> a) -> Mealy a a
forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M a -> a
forall a. a -> a
forall {k} (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id a -> a -> a
forall a. Additive a => a -> a -> a
(+) a -> a
forall a. a -> a
forall {k} (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id) ([a] -> a) -> [a] -> a
forall a b. (a -> b) -> a -> b
$
            ( \a
x' ->
                if a
x' a -> a -> Bool
forall a. Ord a => a -> a -> Bool
> a
l'
                  then a
0
                  else a
1
            )
              (a -> a) -> [a] -> [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [a]
xs

-- | transform an input to a [0,1] signal, via digitalization.
signalize :: Double -> [Double] -> Mealy Double Double
signalize :: Weight -> [Weight] -> Mealy Weight Weight
signalize Weight
r [Weight]
qs' =
  (\Int
x -> Int -> Weight
forall a b. FromIntegral a b => b -> a
fromIntegral Int
x Weight -> Weight -> Weight
forall a. Divisive a => a -> a -> a
/ Int -> Weight
forall a b. FromIntegral a b => b -> a
fromIntegral ([Weight] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Weight]
qs' Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
1)) (Int -> Weight) -> Mealy Weight Int -> Mealy Weight Weight
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Weight -> [Weight] -> Mealy Weight Int
digitize Weight
r [Weight]
qs'