{-# LANGUAGE DataKinds #-}
{-# LANGUAGE RebindableSyntax #-}
{-# LANGUAGE StrictData #-}

module Data.Mealy.Quantiles
  ( median,
    quantiles,
    digitize,
  )
where

import Data.Mealy
import Data.Ord
import Data.TDigest hiding (median)
import Data.TDigest.Internal
import Data.TDigest.Tree.Internal (TDigest (..), absMaxSize, emptyTDigest, insertCentroid, relMaxSize, size, toMVector)
import qualified Data.Vector.Algorithms.Heap as VHeap
import qualified Data.Vector.Unboxed as VU
import NumHask.Prelude hiding (fold)

data OnlineTDigest = OnlineTDigest
  { OnlineTDigest -> TDigest 25
td :: TDigest 25,
    OnlineTDigest -> Int
tdN :: Int,
    OnlineTDigest -> Double
tdRate :: Double
  }
  deriving (Int -> OnlineTDigest -> ShowS
[OnlineTDigest] -> ShowS
OnlineTDigest -> String
(Int -> OnlineTDigest -> ShowS)
-> (OnlineTDigest -> String)
-> ([OnlineTDigest] -> ShowS)
-> Show OnlineTDigest
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [OnlineTDigest] -> ShowS
$cshowList :: [OnlineTDigest] -> ShowS
show :: OnlineTDigest -> String
$cshow :: OnlineTDigest -> String
showsPrec :: Int -> OnlineTDigest -> ShowS
$cshowsPrec :: Int -> OnlineTDigest -> ShowS
Show)

emptyOnlineTDigest :: Double -> OnlineTDigest
emptyOnlineTDigest :: Double -> OnlineTDigest
emptyOnlineTDigest = TDigest 25 -> Int -> Double -> OnlineTDigest
OnlineTDigest (forall (comp :: Nat). TDigest comp
emptyTDigest :: TDigest n) Int
0

-- | decaying quantiles based on the tdigest library
quantiles :: Double -> [Double] -> Mealy Double [Double]
quantiles :: Double -> [Double] -> Mealy Double [Double]
quantiles Double
r [Double]
qs = (Double -> OnlineTDigest)
-> (OnlineTDigest -> Double -> OnlineTDigest)
-> (OnlineTDigest -> [Double])
-> Mealy Double [Double]
forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M Double -> OnlineTDigest
inject OnlineTDigest -> Double -> OnlineTDigest
step OnlineTDigest -> [Double]
extract
  where
    step :: OnlineTDigest -> Double -> OnlineTDigest
step OnlineTDigest
x Double
a = Double -> OnlineTDigest -> OnlineTDigest
onlineInsert Double
a OnlineTDigest
x
    inject :: Double -> OnlineTDigest
inject Double
a = Double -> OnlineTDigest -> OnlineTDigest
onlineInsert Double
a (Double -> OnlineTDigest
emptyOnlineTDigest Double
r)
    extract :: OnlineTDigest -> [Double]
extract OnlineTDigest
x = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe (Double
0 Double -> Double -> Double
forall a. Divisive a => a -> a -> a
/ Double
0) (Maybe Double -> Double)
-> (Double -> Maybe Double) -> Double -> Double
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Double -> TDigest 25 -> Maybe Double
forall (comp :: Nat). Double -> TDigest comp -> Maybe Double
`quantile` TDigest 25
t) (Double -> Double) -> [Double] -> [Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Double]
qs
      where
        (OnlineTDigest TDigest 25
t Int
_ Double
_) = OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
x

median :: Double -> Mealy Double Double
median :: Double -> Mealy Double Double
median Double
r = (Double -> OnlineTDigest)
-> (OnlineTDigest -> Double -> OnlineTDigest)
-> (OnlineTDigest -> Double)
-> Mealy Double Double
forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M Double -> OnlineTDigest
inject OnlineTDigest -> Double -> OnlineTDigest
step OnlineTDigest -> Double
extract
  where
    step :: OnlineTDigest -> Double -> OnlineTDigest
step OnlineTDigest
x Double
a = Double -> OnlineTDigest -> OnlineTDigest
onlineInsert Double
a OnlineTDigest
x
    inject :: Double -> OnlineTDigest
inject Double
a = Double -> OnlineTDigest -> OnlineTDigest
onlineInsert Double
a (Double -> OnlineTDigest
emptyOnlineTDigest Double
r)
    extract :: OnlineTDigest -> Double
extract OnlineTDigest
x = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe (Double
0 Double -> Double -> Double
forall a. Divisive a => a -> a -> a
/ Double
0) (Double -> TDigest 25 -> Maybe Double
forall (comp :: Nat). Double -> TDigest comp -> Maybe Double
quantile Double
0.5 TDigest 25
t)
      where
        (OnlineTDigest TDigest 25
t Int
_ Double
_) = OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
x

onlineInsert' :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert' :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert' Double
x (OnlineTDigest TDigest 25
td' Int
n Double
r) =
  TDigest 25 -> Int -> Double -> OnlineTDigest
OnlineTDigest
    (Centroid -> TDigest 25 -> TDigest 25
forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid (Double
x, Double
r Double -> Integer -> Double
forall b a.
(Ord b, Divisive a, Subtractive b, Integral b) =>
a -> b -> a
^^ (- (Int -> Integer
forall a b. FromIntegral a b => b -> a
fromIntegral (Int -> Integer) -> Int -> Integer
forall a b. (a -> b) -> a -> b
$ Int
n Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
1) :: Integer)) TDigest 25
td')
    (Int
n Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
1)
    Double
r

onlineInsert :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert Double
x OnlineTDigest
otd = OnlineTDigest -> OnlineTDigest
onlineCompress (Double -> OnlineTDigest -> OnlineTDigest
onlineInsert' Double
x OnlineTDigest
otd)

onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress otd :: OnlineTDigest
otd@(OnlineTDigest TDigest 25
Nil Int
_ Double
_) = OnlineTDigest
otd
onlineCompress otd :: OnlineTDigest
otd@(OnlineTDigest TDigest 25
t Int
_ Double
_)
  | TDigest 25 -> Int
forall (comp :: Nat). TDigest comp -> Int
Data.TDigest.Tree.Internal.size TDigest 25
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
relMaxSize Int -> Int -> Int
forall a. Multiplicative a => a -> a -> a
* Int
compression
      Bool -> Bool -> Bool
&& TDigest 25 -> Int
forall (comp :: Nat). TDigest comp -> Int
Data.TDigest.Tree.Internal.size TDigest 25
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
absMaxSize =
    OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
otd
  | Bool
otherwise = OnlineTDigest
otd
  where
    compression :: Int
compression = Int
25

onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress otd :: OnlineTDigest
otd@(OnlineTDigest TDigest 25
Nil Int
_ Double
_) = OnlineTDigest
otd
onlineForceCompress (OnlineTDigest TDigest 25
t Int
n Double
r) = TDigest 25 -> Int -> Double -> OnlineTDigest
OnlineTDigest TDigest 25
t' Int
0 Double
r
  where
    t' :: TDigest 25
t' =
      (TDigest 25 -> Centroid -> TDigest 25)
-> TDigest 25 -> [Centroid] -> TDigest 25
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Centroid -> TDigest 25 -> TDigest 25)
-> TDigest 25 -> Centroid -> TDigest 25
forall a b c. (a -> b -> c) -> b -> a -> c
flip Centroid -> TDigest 25 -> TDigest 25
forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest 25
forall (comp :: Nat). TDigest comp
emptyTDigest ([Centroid] -> TDigest 25) -> [Centroid] -> TDigest 25
forall a b. (a -> b) -> a -> b
$
        (\(Double
m, Double
w) -> (Double
m, Double
w Double -> Double -> Double
forall a. Multiplicative a => a -> a -> a
* (Double
r Double -> Int -> Double
forall b a.
(Ord b, Divisive a, Subtractive b, Integral b) =>
a -> b -> a
^^ Int
n))) (Centroid -> Centroid)
-> ((Centroid, Double) -> Centroid)
-> (Centroid, Double)
-> Centroid
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Centroid, Double) -> Centroid
forall a b. (a, b) -> a
fst ((Centroid, Double) -> Centroid)
-> [(Centroid, Double)] -> [Centroid]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Vector (Centroid, Double) -> [(Centroid, Double)]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector (Centroid, Double)
centroids
    -- Centroids are shuffled based on space
    centroids :: VU.Vector (Centroid, Double)
    centroids :: Vector (Centroid, Double)
centroids =
      (forall s. ST s (Vector (Centroid, Double)))
-> Vector (Centroid, Double)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector (Centroid, Double)))
 -> Vector (Centroid, Double))
-> (forall s. ST s (Vector (Centroid, Double)))
-> Vector (Centroid, Double)
forall a b. (a -> b) -> a -> b
$ do
        MVector s (Centroid, Double)
v <- TDigest 25 -> ST s (MVector s (Centroid, Double))
forall (comp :: Nat) s.
KnownNat comp =>
TDigest comp -> ST s (MVector s (Centroid, Double))
toMVector TDigest 25
t
        -- sort by cumulative weight
        Comparison (Centroid, Double)
-> MVector (PrimState (ST s)) (Centroid, Double) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VHeap.sortBy (((Centroid, Double) -> Double) -> Comparison (Centroid, Double)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Centroid, Double) -> Double
forall a b. (a, b) -> b
snd) MVector s (Centroid, Double)
MVector (PrimState (ST s)) (Centroid, Double)
v
        MVector (PrimState (ST s)) (Centroid, Double)
-> ST s (Vector (Centroid, Double))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Centroid, Double)
MVector (PrimState (ST s)) (Centroid, Double)
v

digitize :: Double -> [Double] -> Mealy Double Int
digitize :: Double -> [Double] -> Mealy Double Int
digitize Double
r [Double]
qs = (Double -> (OnlineTDigest, Double))
-> ((OnlineTDigest, Double) -> Double -> (OnlineTDigest, Double))
-> ((OnlineTDigest, Double) -> Int)
-> Mealy Double Int
forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M Double -> (OnlineTDigest, Double)
inject (OnlineTDigest, Double) -> Double -> (OnlineTDigest, Double)
forall b. (OnlineTDigest, b) -> Double -> (OnlineTDigest, Double)
step (OnlineTDigest, Double) -> Int
forall a.
(Additive a, FromInteger a) =>
(OnlineTDigest, Double) -> a
extract
  where
    step :: (OnlineTDigest, b) -> Double -> (OnlineTDigest, Double)
step (OnlineTDigest
x, b
_) Double
a = (Double -> OnlineTDigest -> OnlineTDigest
onlineInsert Double
a OnlineTDigest
x, Double
a)
    inject :: Double -> (OnlineTDigest, Double)
inject Double
a = (Double -> OnlineTDigest -> OnlineTDigest
onlineInsert Double
a (Double -> OnlineTDigest
emptyOnlineTDigest Double
r), Double
a)
    extract :: (OnlineTDigest, Double) -> a
extract (OnlineTDigest
x, Double
l) = [Double] -> Double -> a
forall a a. (Additive a, Ord a, FromInteger a) => [a] -> a -> a
bucket' [Double]
qs' Double
l
      where
        qs' :: [Double]
qs' = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe (Double
0 Double -> Double -> Double
forall a. Divisive a => a -> a -> a
/ Double
0) (Maybe Double -> Double)
-> (Double -> Maybe Double) -> Double -> Double
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Double -> TDigest 25 -> Maybe Double
forall (comp :: Nat). Double -> TDigest comp -> Maybe Double
`quantile` TDigest 25
t) (Double -> Double) -> [Double] -> [Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Double]
qs
        (OnlineTDigest TDigest 25
t Int
_ Double
_) = OnlineTDigest -> OnlineTDigest
onlineForceCompress OnlineTDigest
x
        bucket' :: [a] -> a -> a
bucket' [a]
xs a
l' =
          Mealy a a -> [a] -> a
forall a b. Mealy a b -> [a] -> b
fold ((a -> a) -> (a -> a -> a) -> (a -> a) -> Mealy a a
forall a b c. (a -> c) -> (c -> a -> c) -> (c -> b) -> Mealy a b
M a -> a
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id a -> a -> a
forall a. Additive a => a -> a -> a
(+) a -> a
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id) ([a] -> a) -> [a] -> a
forall a b. (a -> b) -> a -> b
$
            ( \a
x' ->
                if a
x' a -> a -> Bool
forall a. Ord a => a -> a -> Bool
> a
l'
                  then a
0
                  else a
1
            )
              (a -> a) -> [a] -> [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [a]
xs