{-# LANGUAGE DataKinds #-}
{-# LANGUAGE RebindableSyntax #-}

-- | Mealy quantile statistics.
module Data.Mealy.Quantiles
  ( median,
    quantiles,
    digitize,
    signalize,
  )
where

import Control.Monad.ST
import Data.Mealy
import Data.Ord
import Data.TDigest hiding (median)
import Data.TDigest.Internal
import Data.TDigest.Tree.Internal (TDigest (..), absMaxSize, emptyTDigest, insertCentroid, relMaxSize, size, toMVector)
import Data.Vector.Algorithms.Heap qualified as VHeap
import Data.Vector.Unboxed qualified as VU
import NumHask.Prelude hiding (fold)

data OnlineTDigest = OnlineTDigest
  { OnlineTDigest -> TDigest 25
td :: TDigest 25,
    OnlineTDigest -> Int
tdN :: Int,
    OnlineTDigest -> Weight
tdRate :: Double
  }
  deriving (Int -> OnlineTDigest -> ShowS
[OnlineTDigest] -> ShowS
OnlineTDigest -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [OnlineTDigest] -> ShowS
$cshowList :: [OnlineTDigest] -> ShowS
show :: OnlineTDigest -> String
$cshow :: OnlineTDigest -> String
showsPrec :: Int -> OnlineTDigest -> ShowS
$cshowsPrec :: Int -> OnlineTDigest -> ShowS
Show)

emptyOnlineTDigest :: Double -> OnlineTDigest
emptyOnlineTDigest :: Weight -> OnlineTDigest
emptyOnlineTDigest = TDigest 25 -> Int -> Weight -> OnlineTDigest
OnlineTDigest (forall (comp :: Nat). TDigest comp
emptyTDigest :: TDigest n) Int
0

-- | Mealy quantiles based on the tdigest library
quantiles :: Double -> [Double] -> Mealy Double [Double]
quantiles :: Weight -> [Weight] -> Mealy Weight [Weight]
quantiles Weight
r [Weight]
qs = forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M Weight -> OnlineTDigest
inject OnlineTDigest -> Weight -> OnlineTDigest
step OnlineTDigest -> [Weight]
extract
  where
    step :: OnlineTDigest -> Weight -> OnlineTDigest
step OnlineTDigest
x Weight
a = Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a OnlineTDigest
x
    inject :: Weight -> OnlineTDigest
inject Weight
a = Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a (Weight -> OnlineTDigest
emptyOnlineTDigest Weight
r)
    extract :: OnlineTDigest -> [Weight]
extract OnlineTDigest
x = forall a. a -> Maybe a -> a
fromMaybe (Weight
0 forall a. Divisive a => a -> a -> a
/ Weight
0) forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (forall (comp :: Nat). Weight -> TDigest comp -> Maybe Weight
`quantile` TDigest 25
t) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Weight]
qs
      where
        (OnlineTDigest TDigest 25
t Int
_ Weight
_) = OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
x

-- | Mealy median using 'tdigest'
--
-- The tdigest algorithm works best at extremes and can be unreliable in the centre.
median :: Double -> Mealy Double Double
median :: Weight -> Mealy Weight Weight
median Weight
r = forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M Weight -> OnlineTDigest
inject OnlineTDigest -> Weight -> OnlineTDigest
step OnlineTDigest -> Weight
extract
  where
    step :: OnlineTDigest -> Weight -> OnlineTDigest
step OnlineTDigest
x Weight
a = Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a OnlineTDigest
x
    inject :: Weight -> OnlineTDigest
inject Weight
a = Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a (Weight -> OnlineTDigest
emptyOnlineTDigest Weight
r)
    extract :: OnlineTDigest -> Weight
extract OnlineTDigest
x = forall a. a -> Maybe a -> a
fromMaybe (Weight
0 forall a. Divisive a => a -> a -> a
/ Weight
0) (forall (comp :: Nat). Weight -> TDigest comp -> Maybe Weight
quantile Weight
0.5 TDigest 25
t)
      where
        (OnlineTDigest TDigest 25
t Int
_ Weight
_) = OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
x

onlineInsert' :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert' :: Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert' Weight
x (OnlineTDigest TDigest 25
td' Int
n Weight
r) =
  TDigest 25 -> Int -> Weight -> OnlineTDigest
OnlineTDigest
    (forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid (Weight
x, Weight
r forall b a.
(Ord b, Divisive a, Subtractive b, Integral b) =>
a -> b -> a
^^ (-(forall a b. FromIntegral a b => b -> a
fromIntegral forall a b. (a -> b) -> a -> b
$ Int
n forall a. Additive a => a -> a -> a
+ Int
1) :: Integer)) TDigest 25
td')
    (Int
n forall a. Additive a => a -> a -> a
+ Int
1)
    Weight
r

onlineInsert :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert :: Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
x OnlineTDigest
otd = OnlineTDigest -> OnlineTDigest
onlineCompress (Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert' Weight
x OnlineTDigest
otd)

onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress otd :: OnlineTDigest
otd@(OnlineTDigest TDigest 25
Nil Int
_ Weight
_) = OnlineTDigest
otd
onlineCompress otd :: OnlineTDigest
otd@(OnlineTDigest TDigest 25
t Int
_ Weight
_)
  | forall (comp :: Nat). TDigest comp -> Int
Data.TDigest.Tree.Internal.size TDigest 25
t forall a. Ord a => a -> a -> Bool
> Int
relMaxSize forall a. Multiplicative a => a -> a -> a
* Int
compression
      Bool -> Bool -> Bool
&& forall (comp :: Nat). TDigest comp -> Int
Data.TDigest.Tree.Internal.size TDigest 25
t forall a. Ord a => a -> a -> Bool
> Int
absMaxSize =
      OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
otd
  | Bool
otherwise = OnlineTDigest
otd
  where
    compression :: Int
compression = Int
25

onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress otd :: OnlineTDigest
otd@(OnlineTDigest TDigest 25
Nil Int
_ Weight
_) = OnlineTDigest
otd
onlineForceCompress (OnlineTDigest TDigest 25
t Int
n Weight
r) = TDigest 25 -> Int -> Weight -> OnlineTDigest
OnlineTDigest TDigest 25
t' Int
0 Weight
r
  where
    t' :: TDigest 25
t' =
      forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) forall (comp :: Nat). TDigest comp
emptyTDigest forall a b. (a -> b) -> a -> b
$
        (\(Weight
m, Weight
w) -> (Weight
m, Weight
w forall a. Multiplicative a => a -> a -> a
* (Weight
r forall b a.
(Ord b, Divisive a, Subtractive b, Integral b) =>
a -> b -> a
^^ Int
n))) forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Unbox a => Vector a -> [a]
VU.toList Vector (Centroid, Weight)
centroids
    -- Centroids are shuffled based on space
    centroids :: VU.Vector (Centroid, Double)
    centroids :: Vector (Centroid, Weight)
centroids =
      forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
        MVector s (Centroid, Weight)
v <- forall (comp :: Nat) s.
KnownNat comp =>
TDigest comp -> ST s (MVector s (Centroid, Weight))
toMVector TDigest 25
t
        -- sort by cumulative weight
        forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VHeap.sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> b
snd) MVector s (Centroid, Weight)
v
        forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Centroid, Weight)
v

-- | A mealy that computes the running quantile bucket. For example,
-- in a scan, @digitize 0.9 [0.5]@ returns:
--
-- * 0 if the current value is less than the current mealy median.
--
-- * 1 if the current value is greater than the current mealy median.
digitize :: Double -> [Double] -> Mealy Double Int
digitize :: Weight -> [Weight] -> Mealy Weight Int
digitize Weight
r [Weight]
qs = forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M Weight -> (OnlineTDigest, Weight)
inject forall {b}. (OnlineTDigest, b) -> Weight -> (OnlineTDigest, Weight)
step forall {a}.
(Additive a, FromInteger a) =>
(OnlineTDigest, Weight) -> a
extract
  where
    step :: (OnlineTDigest, b) -> Weight -> (OnlineTDigest, Weight)
step (OnlineTDigest
x, b
_) Weight
a = (Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a OnlineTDigest
x, Weight
a)
    inject :: Weight -> (OnlineTDigest, Weight)
inject Weight
a = (Weight -> OnlineTDigest -> OnlineTDigest
onlineInsert Weight
a (Weight -> OnlineTDigest
emptyOnlineTDigest Weight
r), Weight
a)
    extract :: (OnlineTDigest, Weight) -> a
extract (OnlineTDigest
x, Weight
l) = forall {a} {a}. (Additive a, Ord a, FromInteger a) => [a] -> a -> a
bucket' [Weight]
qs' Weight
l
      where
        qs' :: [Weight]
qs' = forall a. a -> Maybe a -> a
fromMaybe (Weight
0 forall a. Divisive a => a -> a -> a
/ Weight
0) forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (forall (comp :: Nat). Weight -> TDigest comp -> Maybe Weight
`quantile` TDigest 25
t) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Weight]
qs
        (OnlineTDigest TDigest 25
t Int
_ Weight
_) = OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
x
        bucket' :: [a] -> a -> a
bucket' [a]
xs a
l' =
          forall a b. Mealy a b -> [a] -> b
fold (forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M forall {k} (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id forall a. Additive a => a -> a -> a
(+) forall {k} (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id) forall a b. (a -> b) -> a -> b
$
            ( \a
x' ->
                if a
x' forall a. Ord a => a -> a -> Bool
> a
l'
                  then a
0
                  else a
1
            )
              forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [a]
xs

-- | transform an input to a [0,1] signal, via digitalization.
signalize :: Double -> [Double] -> Mealy Double Double
signalize :: Weight -> [Weight] -> Mealy Weight Weight
signalize Weight
r [Weight]
qs' =
  (\Int
x -> forall a b. FromIntegral a b => b -> a
fromIntegral Int
x forall a. Divisive a => a -> a -> a
/ forall a b. FromIntegral a b => b -> a
fromIntegral (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Weight]
qs' forall a. Additive a => a -> a -> a
+ Int
1)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Weight -> [Weight] -> Mealy Weight Int
digitize Weight
r [Weight]
qs'