{-# LANGUAGE DataKinds             #-}
{-# LANGUAGE KindSignatures        #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables   #-}
-- | Internals of 'TDigest'.
--
-- Tree implementation is based on /Adams’ Trees Revisited/ by Milan Straka
-- <http://fox.ucw.cz/papers/bbtree/bbtree.pdf>
module Data.TDigest.Tree.Internal where

import Control.DeepSeq        (NFData (..))
import Control.Monad.ST       (ST, runST)
import Data.Binary            (Binary (..))
import Data.Either            (isRight)
import Data.Foldable          (toList)
import Data.List.Compat       (foldl')
import Data.List.NonEmpty     (nonEmpty)
import Data.Ord               (comparing)
import Data.Proxy             (Proxy (..))
import Data.Semigroup         (Semigroup (..))
import Data.Semigroup.Reducer (Reducer (..))
import GHC.TypeLits           (KnownNat, Nat, natVal)
import Prelude ()
import Prelude.Compat

import qualified Data.Vector.Algorithms.Heap as VHeap
import qualified Data.Vector.Unboxed         as VU
import qualified Data.Vector.Unboxed.Mutable as MVU

import           Data.TDigest.Internal
import qualified Data.TDigest.Postprocess.Internal as PP

-------------------------------------------------------------------------------
-- TDigest
-------------------------------------------------------------------------------

-- | 'TDigest' is a tree of centroids.
--
-- @compression@ is a @1/δ@. The greater the value of @compression@ the less
-- likely value merging will happen.
data TDigest (compression :: Nat)
    -- | Tree node
    = Node
        {-# UNPACK #-} !Size     -- size of this tree/centroid
        {-# UNPACK #-} !Mean     -- mean of the centroid
        {-# UNPACK #-} !Weight   -- weight of the centrod
        {-# UNPACK #-} !Weight   -- total weight of the tree
        !(TDigest compression)   -- left subtree
        !(TDigest compression)   -- right subtree
    -- | Empty tree
    | Nil
  deriving (Int -> TDigest compression -> ShowS
[TDigest compression] -> ShowS
TDigest compression -> String
(Int -> TDigest compression -> ShowS)
-> (TDigest compression -> String)
-> ([TDigest compression] -> ShowS)
-> Show (TDigest compression)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall (compression :: Nat). Int -> TDigest compression -> ShowS
forall (compression :: Nat). [TDigest compression] -> ShowS
forall (compression :: Nat). TDigest compression -> String
showList :: [TDigest compression] -> ShowS
$cshowList :: forall (compression :: Nat). [TDigest compression] -> ShowS
show :: TDigest compression -> String
$cshow :: forall (compression :: Nat). TDigest compression -> String
showsPrec :: Int -> TDigest compression -> ShowS
$cshowsPrec :: forall (compression :: Nat). Int -> TDigest compression -> ShowS
Show)

-- [Note: keep min & max in the tree]
--
-- We tried it, but it seems the alloc/update cost is bigger than
-- re-calculating them on need (it's O(log n) - calculation!)

-- [Note: singleton node]
-- We tried to add one, but haven't seen change in performance

-- [Note: inlining balanceR and balanceL]
-- We probably can squueze some performance by making
-- 'balanceL' and 'balanceR' check arguments only once (like @containers@ do)
-- and not use 'node' function.
-- *But*, the benefit vs. code explosion is not yet worth.

instance KnownNat comp => Semigroup (TDigest comp) where
    <> :: TDigest comp -> TDigest comp -> TDigest comp
(<>) = TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineDigest

-- | Both 'cons' and 'snoc' are 'insert'
instance KnownNat comp => Reducer Double (TDigest comp) where
    cons :: Double -> TDigest comp -> TDigest comp
cons = Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
    snoc :: TDigest comp -> Double -> TDigest comp
snoc = (Double -> TDigest comp -> TDigest comp)
-> TDigest comp -> Double -> TDigest comp
forall a b c. (a -> b -> c) -> b -> a -> c
flip Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
    unit :: Double -> TDigest comp
unit = Double -> TDigest comp
forall (comp :: Nat). KnownNat comp => Double -> TDigest comp
singleton

instance  KnownNat comp => Monoid (TDigest comp) where
    mempty :: TDigest comp
mempty  = TDigest comp
forall (comp :: Nat). TDigest comp
emptyTDigest
    mappend :: TDigest comp -> TDigest comp -> TDigest comp
mappend = TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineDigest

-- | 'TDigest' has only strict fields.
instance NFData (TDigest comp) where
    rnf :: TDigest comp -> ()
rnf TDigest comp
x = TDigest comp
x TDigest comp -> () -> ()
`seq` ()

-- | 'TDigest' isn't compressed after de-serialisation,
-- but it can be still smaller.
instance KnownNat comp => Binary (TDigest comp) where
    put :: TDigest comp -> Put
put = [Centroid] -> Put
forall t. Binary t => t -> Put
put ([Centroid] -> Put)
-> (TDigest comp -> [Centroid]) -> TDigest comp -> Put
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest comp -> [Centroid]
forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids
    get :: Get (TDigest comp)
get = (TDigest comp -> Centroid -> TDigest comp)
-> TDigest comp -> [Centroid] -> TDigest comp
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Centroid -> TDigest comp -> TDigest comp)
-> TDigest comp -> Centroid -> TDigest comp
forall a b c. (a -> b -> c) -> b -> a -> c
flip Centroid -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest comp
forall (comp :: Nat). TDigest comp
emptyTDigest ([Centroid] -> TDigest comp)
-> ([Centroid] -> [Centroid]) -> [Centroid] -> TDigest comp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Centroid] -> [Centroid]
lc ([Centroid] -> TDigest comp)
-> Get [Centroid] -> Get (TDigest comp)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get [Centroid]
forall t. Binary t => Get t
get
      where
        lc :: [Centroid] -> [Centroid]
        lc :: [Centroid] -> [Centroid]
lc = [Centroid] -> [Centroid]
forall a. a -> a
id

instance PP.HasHistogram (TDigest comp) Maybe where
    histogram :: TDigest comp -> Maybe (NonEmpty HistBin)
histogram = (NonEmpty Centroid -> NonEmpty HistBin)
-> Maybe (NonEmpty Centroid) -> Maybe (NonEmpty HistBin)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap NonEmpty Centroid -> NonEmpty HistBin
PP.histogramFromCentroids (Maybe (NonEmpty Centroid) -> Maybe (NonEmpty HistBin))
-> (TDigest comp -> Maybe (NonEmpty Centroid))
-> TDigest comp
-> Maybe (NonEmpty HistBin)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Centroid] -> Maybe (NonEmpty Centroid)
forall a. [a] -> Maybe (NonEmpty a)
nonEmpty ([Centroid] -> Maybe (NonEmpty Centroid))
-> (TDigest comp -> [Centroid])
-> TDigest comp
-> Maybe (NonEmpty Centroid)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest comp -> [Centroid]
forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids
    totalWeight :: TDigest comp -> Double
totalWeight = TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight

getCentroids :: TDigest comp -> [Centroid]
getCentroids :: TDigest comp -> [Centroid]
getCentroids = (([Centroid] -> [Centroid]) -> [Centroid] -> [Centroid]
forall a b. (a -> b) -> a -> b
$ []) (([Centroid] -> [Centroid]) -> [Centroid])
-> (TDigest comp -> [Centroid] -> [Centroid])
-> TDigest comp
-> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest comp -> [Centroid] -> [Centroid]
forall (compression :: Nat).
TDigest compression -> [Centroid] -> [Centroid]
go
  where
    go :: TDigest compression -> [Centroid] -> [Centroid]
go TDigest compression
Nil                = [Centroid] -> [Centroid]
forall a. a -> a
id
    go (Node Int
_ Double
x Double
w Double
_ TDigest compression
l TDigest compression
r) = TDigest compression -> [Centroid] -> [Centroid]
go TDigest compression
l ([Centroid] -> [Centroid])
-> ([Centroid] -> [Centroid]) -> [Centroid] -> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Double
x,Double
w) Centroid -> [Centroid] -> [Centroid]
forall a. a -> [a] -> [a]
: ) ([Centroid] -> [Centroid])
-> ([Centroid] -> [Centroid]) -> [Centroid] -> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest compression -> [Centroid] -> [Centroid]
go TDigest compression
r

-- | Total count of samples.
--
-- >>> totalWeight (tdigest [1..100] :: TDigest 5)
-- 100.0
--
totalWeight :: TDigest comp -> Weight
totalWeight :: TDigest comp -> Double
totalWeight TDigest comp
Nil                 = Double
0
totalWeight (Node Int
_ Double
_ Double
_ Double
tw TDigest comp
_ TDigest comp
_) = Double
tw

size :: TDigest comp -> Int
size :: TDigest comp -> Int
size TDigest comp
Nil                    = Int
0
size (Node Int
s Double
_ Double
_ Double
_ TDigest comp
_ TDigest comp
_) = Int
s

-- | Center of left-most centroid. Note: may be different than min element inserted.
--
-- >>> minimumValue (tdigest [1..100] :: TDigest 3)
-- 1.0
--
minimumValue :: TDigest comp -> Mean
minimumValue :: TDigest comp -> Double
minimumValue = Double -> TDigest comp -> Double
forall (compression :: Nat).
Double -> TDigest compression -> Double
go Double
posInf
  where
    go :: Double -> TDigest compression -> Double
go  Double
acc TDigest compression
Nil                    = Double
acc
    go Double
_acc (Node Int
_ Double
x Double
_ Double
_ TDigest compression
l TDigest compression
_) = Double -> TDigest compression -> Double
go Double
x TDigest compression
l

-- | Center of right-most centroid. Note: may be different than max element inserted.
--
-- >>> maximumValue (tdigest [1..100] :: TDigest 3)
-- 99.0
--
maximumValue :: TDigest comp -> Mean
maximumValue :: TDigest comp -> Double
maximumValue = Double -> TDigest comp -> Double
forall (compression :: Nat).
Double -> TDigest compression -> Double
go Double
negInf
  where
    go :: Double -> TDigest compression -> Double
go  Double
acc TDigest compression
Nil                    = Double
acc
    go Double
_acc (Node Int
_ Double
x Double
_ Double
_ TDigest compression
_ TDigest compression
r) = Double -> TDigest compression -> Double
go Double
x TDigest compression
r

-------------------------------------------------------------------------------
-- Implementation
-------------------------------------------------------------------------------

emptyTDigest :: TDigest comp
emptyTDigest :: TDigest comp
emptyTDigest = TDigest comp
forall (comp :: Nat). TDigest comp
Nil

combineDigest
    :: KnownNat comp
    => TDigest comp
    -> TDigest comp
    -> TDigest comp
combineDigest :: TDigest comp -> TDigest comp -> TDigest comp
combineDigest TDigest comp
a TDigest comp
Nil = TDigest comp
a
combineDigest TDigest comp
Nil TDigest comp
b = TDigest comp
b
combineDigest a :: TDigest comp
a@(Node Int
n Double
_ Double
_ Double
_ TDigest comp
_ TDigest comp
_) b :: TDigest comp
b@(Node Int
m Double
_ Double
_ Double
_ TDigest comp
_ TDigest comp
_)
    -- TODO: merge first, then shuffle and insert (part of compress)
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
m     = TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress (TDigest comp -> TDigest comp) -> TDigest comp -> TDigest comp
forall a b. (a -> b) -> a -> b
$ (TDigest comp -> Centroid -> TDigest comp)
-> TDigest comp -> [Centroid] -> TDigest comp
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Centroid -> TDigest comp -> TDigest comp)
-> TDigest comp -> Centroid -> TDigest comp
forall a b c. (a -> b -> c) -> b -> a -> c
flip Centroid -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest comp
b (TDigest comp -> [Centroid]
forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids TDigest comp
a)
    | Bool
otherwise = TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress (TDigest comp -> TDigest comp) -> TDigest comp -> TDigest comp
forall a b. (a -> b) -> a -> b
$ (TDigest comp -> Centroid -> TDigest comp)
-> TDigest comp -> [Centroid] -> TDigest comp
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Centroid -> TDigest comp -> TDigest comp)
-> TDigest comp -> Centroid -> TDigest comp
forall a b c. (a -> b -> c) -> b -> a -> c
flip Centroid -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest comp
a (TDigest comp -> [Centroid]
forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids TDigest comp
b)

insertCentroid
    :: forall comp. KnownNat comp
    => Centroid
    -> TDigest comp
    -> TDigest comp
insertCentroid :: Centroid -> TDigest comp -> TDigest comp
insertCentroid (Double
x, Double
w) TDigest comp
Nil        = Double -> Double -> TDigest comp
forall (comp :: Nat). Double -> Double -> TDigest comp
singNode Double
x Double
w
insertCentroid (Double
mean, Double
weight) TDigest comp
td = Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
0 Double
mean Double
weight Bool
False TDigest comp
td
  where
    -- New weight of the tree
    n :: Weight
    n :: Double
n = TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
td Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
weight

    -- 1/delta
    compression :: Double
    compression :: Double
compression = Integer -> Double
forall a. Num a => Integer -> a
fromInteger (Integer -> Double) -> Integer -> Double
forall a b. (a -> b) -> a -> b
$ Proxy comp -> Integer
forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (Proxy comp
forall k (t :: k). Proxy t
Proxy :: Proxy comp)

    go
        :: Weight        -- weight to the left of this tree
        -> Mean          -- mean to insert
        -> Weight        -- weight to insert
        -> Bool          -- should insert everything.
                         -- if we merged somewhere on top, rest is inserted as is
        -> TDigest comp  -- subtree to insert/merge centroid into
        -> TDigest comp
    go :: Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
_   Double
newX Double
newW Bool
_ TDigest comp
Nil                 = Double -> Double -> TDigest comp
forall (comp :: Nat). Double -> Double -> TDigest comp
singNode Double
newX Double
newW
    go Double
cum Double
newX Double
newW Bool
e (Node Int
s Double
x Double
w Double
tw TDigest comp
l TDigest comp
r) = case Double -> Double -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Double
newX Double
x of
        -- Exact match, insert here
        Ordering
EQ -> Int
-> Double
-> Double
-> Double
-> TDigest comp
-> TDigest comp
-> TDigest comp
forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node Int
s Double
x (Double
w Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
newW) (Double
tw Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
l TDigest comp
r -- node x (w + newW) l r

        -- there is *no* room to insert into this node
        Ordering
LT | Double
thr Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
w -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
newW Bool
e TDigest comp
l) TDigest comp
r
        Ordering
GT | Double
thr Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
w -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
w) Double
newX Double
newW Bool
e TDigest comp
r)

        -- otherwise go left ... or later right
        Ordering
LT | Bool
e -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
newW Bool
e TDigest comp
l) TDigest comp
r
        Ordering
LT -> case TDigest comp
l of
            -- always create a new node
            TDigest comp
Nil -> case Maybe Double
mrw of
                Maybe Double
Nothing     -> Int
-> Double
-> Double
-> Double
-> TDigest comp
-> TDigest comp
-> TDigest comp
forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
forall (comp :: Nat). TDigest comp
Nil TDigest comp
r
                Just Double
rw     -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
nx Double
nw (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
rw Bool
True TDigest comp
forall (comp :: Nat). TDigest comp
Nil) TDigest comp
r
            Node {}
                | Double
lmax Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
newX Bool -> Bool -> Bool
&& Double -> Double
forall a. Num a => a -> a
abs (Double
newX Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
x) Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double -> Double
forall a. Num a => a -> a
abs (Double
newX Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lmax) {- && newX < x -} -> case Maybe Double
mrw of
                    Maybe Double
Nothing -> Int
-> Double
-> Double
-> Double
-> TDigest comp
-> TDigest comp
-> TDigest comp
forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
nw Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
w) TDigest comp
l TDigest comp
r
                    -- in this two last LT cases, we have to recalculate size
                    Just Double
rw -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
nx Double
nw (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
rw Bool
True TDigest comp
l) TDigest comp
r
                | Bool
otherwise -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
newW Bool
e TDigest comp
l) TDigest comp
r
              where
                lmax :: Double
lmax = TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
maximumValue TDigest comp
l

        -- ... or right
        Ordering
GT | Bool
e -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
w) Double
newX Double
newW Bool
True TDigest comp
r)
        Ordering
GT -> case TDigest comp
r of
            TDigest comp
Nil -> case Maybe Double
mrw of
                Maybe Double
Nothing     -> Int
-> Double
-> Double
-> Double
-> TDigest comp
-> TDigest comp
-> TDigest comp
forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
l TDigest comp
forall (comp :: Nat). TDigest comp
Nil
                Just Double
rw     -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
nx Double
nw TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
nw) Double
newX Double
rw Bool
True TDigest comp
forall (comp :: Nat). TDigest comp
Nil)
            Node {}
                | Double
rmin Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
newX Bool -> Bool -> Bool
&& Double -> Double
forall a. Num a => a -> a
abs (Double
newX Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
x) Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double -> Double
forall a. Num a => a -> a
abs (Double
newX Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
rmin) {- && newX > x -} -> case Maybe Double
mrw of
                    Maybe Double
Nothing -> Int
-> Double
-> Double
-> Double
-> TDigest comp
-> TDigest comp
-> TDigest comp
forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
l TDigest comp
r
                    -- in this two last GT cases, we have to recalculate size
                    Just Double
rw -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
nx Double
nw TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
nw) Double
newX Double
rw Bool
True TDigest comp
r)
                | Bool
otherwise -> Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
w) Double
newX Double
newW Bool
e TDigest comp
r)
              where
                rmin :: Double
rmin = TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
minimumValue TDigest comp
r
      where
        -- quantile approximation of current node
        cum' :: Double
cum' = Double
cum Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l
        q :: Double
q   = (Double
w Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
cum') Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
n

        -- threshold, max size of current node/centroid
        thr :: Double
thr = {- traceShowId $ traceShow (n, q) $ -} Double -> Double -> Double -> Double
threshold Double
n Double
q Double
compression

        -- We later use nx, nw and mrw:

        -- max size of current node
        dw :: Weight
        mrw :: Maybe Weight
        (Double
dw, Maybe Double
mrw) =
            let diff :: Double
diff = Bool -> String -> Double -> Double
forall a. Bool -> String -> a -> a
assert (Double
thr Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
w) String
"threshold should be larger than current node weight"
                     (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
w Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
newW Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
thr
            in if Double
diff Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0 -- i.e. there is room
                then (Double
newW, Maybe Double
forall a. Maybe a
Nothing)
                else (Double
thr Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
w, Double -> Maybe Double
forall a. a -> Maybe a
Just Double
diff)

        -- the change of current node
        (Double
nx, Double
nw) = {- traceShowId $ traceShow (newX, newW, x, dw, mrw) $ -} Double -> Double -> Double -> Double -> Centroid
combinedCentroid Double
x Double
w Double
x Double
dw

-- | Constructor which calculates size and total weight.
node :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
node :: Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r = Int
-> Double
-> Double
-> Double
-> TDigest comp
-> TDigest comp
-> TDigest comp
forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node
    (Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r)
    Double
x Double
w
    (Double
w Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
r)
    TDigest comp
l TDigest comp
r

-- | Balance after right insertion.
balanceR :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
balanceR :: Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l TDigest comp
r
    | TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
1 = Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r
    | TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
balOmega Int -> Int -> Int
forall a. Num a => a -> a -> a
* TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l = case TDigest comp
r of
        TDigest comp
Nil -> String -> TDigest comp
forall a. HasCallStack => String -> a
error String
"balanceR: impossible happened"
        (Node Int
_ Double
rx Double
rw Double
_ TDigest comp
Nil TDigest comp
rr) ->
            -- assert (0 < balAlpha * size rr) "balanceR" $
                -- single left rotation
                Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rx Double
rw (Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
forall (comp :: Nat). TDigest comp
Nil) TDigest comp
rr
        (Node Int
_ Double
rx Double
rw Double
_ TDigest comp
rl TDigest comp
rr)
            | TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
rl Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
balAlpha Int -> Int -> Int
forall a. Num a => a -> a -> a
* TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
rr ->
                -- single left rotation
                Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rx Double
rw (Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
rl) TDigest comp
rr
        (Node Int
_ Double
rx Double
rw Double
_ (Node Int
_ Double
rlx Double
rlw Double
_ TDigest comp
rll TDigest comp
rlr) TDigest comp
rr) ->
                -- double left rotation
                Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rlx Double
rlw (Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
rll) (Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rx Double
rw TDigest comp
rlr TDigest comp
rr)
    | Bool
otherwise            = Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r

-- | Balance after left insertion.
balanceL :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
balanceL :: Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w TDigest comp
l TDigest comp
r
    | TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
1 = Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r
    | TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
balOmega Int -> Int -> Int
forall a. Num a => a -> a -> a
* TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r = case TDigest comp
l of
        TDigest comp
Nil -> String -> TDigest comp
forall a. HasCallStack => String -> a
error String
"balanceL: impossible happened"
        (Node Int
_ Double
lx Double
lw Double
_ TDigest comp
ll TDigest comp
Nil) ->
            -- assert (0 < balAlpha * size ll) "balanceL" $
                -- single right rotation
                Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lx Double
lw TDigest comp
ll (Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
forall (comp :: Nat). TDigest comp
Nil TDigest comp
r)
        (Node Int
_ Double
lx Double
lw Double
_ TDigest comp
ll TDigest comp
lr)
            | TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
lr Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
balAlpha Int -> Int -> Int
forall a. Num a => a -> a -> a
* TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
ll ->
                -- single right rotation
                Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lx Double
lw TDigest comp
ll (Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
lr TDigest comp
r)
        (Node Int
_ Double
lx Double
lw Double
_ TDigest comp
ll (Node Int
_ Double
lrx Double
lrw Double
_ TDigest comp
lrl TDigest comp
lrr)) ->
                -- double left rotation
                Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lrx Double
lrw (Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lx Double
lw TDigest comp
ll TDigest comp
lrl) (Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
lrr TDigest comp
r)
    | Bool
otherwise = Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r

-- | Alias to 'Node'
node' :: Int -> Mean -> Weight -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
node' :: Int
-> Double
-> Double
-> Double
-> TDigest comp
-> TDigest comp
-> TDigest comp
node' = Int
-> Double
-> Double
-> Double
-> TDigest comp
-> TDigest comp
-> TDigest comp
forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node

-- | Create singular node.
singNode :: Mean -> Weight -> TDigest comp
singNode :: Double -> Double -> TDigest comp
singNode Double
x Double
w = Int
-> Double
-> Double
-> Double
-> TDigest comp
-> TDigest comp
-> TDigest comp
forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node Int
1 Double
x Double
w Double
w TDigest comp
forall (comp :: Nat). TDigest comp
Nil TDigest comp
forall (comp :: Nat). TDigest comp
Nil

-- | Add two weighted means together.
combinedCentroid
    :: Mean -> Weight
    -> Mean -> Weight
    -> Centroid
combinedCentroid :: Double -> Double -> Double -> Double -> Centroid
combinedCentroid Double
x Double
w Double
x' Double
w' =
    ( (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
w Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
x' Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
w') Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
w'' -- this is probably not num. stable
    , Double
w''
    )
  where
    w'' :: Double
w'' = Double
w Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
w'

-- | Calculate the threshold, i.e. maximum weight of centroid.
threshold
    :: Double  -- ^ total weight
    -> Double  -- ^ quantile
    -> Double  -- ^ compression (1/δ)
    -> Double
threshold :: Double -> Double -> Double -> Double
threshold Double
n Double
q Double
compression = Double
4 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
q Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
q) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
compression

-------------------------------------------------------------------------------
-- Compression
-------------------------------------------------------------------------------

-- | Compress 'TDigest'.
--
-- Reinsert the centroids in "better" order (in original paper: in random)
-- so they have opportunity to merge.
--
-- Compression will happen only if size is both:
-- bigger than @'relMaxSize' * comp@ and bigger than 'absMaxSize'.
--
compress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
compress :: TDigest comp -> TDigest comp
compress TDigest comp
Nil = TDigest comp
forall (comp :: Nat). TDigest comp
Nil
compress TDigest comp
td
    | TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
relMaxSize Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
compression Bool -> Bool -> Bool
&& TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
absMaxSize
        = TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress TDigest comp
td
    | Bool
otherwise
        = TDigest comp
td
  where
    compression :: Int
compression = Integer -> Int
forall a. Num a => Integer -> a
fromInteger (Integer -> Int) -> Integer -> Int
forall a b. (a -> b) -> a -> b
$ Proxy comp -> Integer
forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (Proxy comp
forall k (t :: k). Proxy t
Proxy :: Proxy comp)

-- | Perform compression, even if current size says it's not necessary.
forceCompress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
forceCompress :: TDigest comp -> TDigest comp
forceCompress TDigest comp
Nil = TDigest comp
forall (comp :: Nat). TDigest comp
Nil
forceCompress TDigest comp
td =
    (TDigest comp -> Centroid -> TDigest comp)
-> TDigest comp -> [Centroid] -> TDigest comp
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Centroid -> TDigest comp -> TDigest comp)
-> TDigest comp -> Centroid -> TDigest comp
forall a b c. (a -> b -> c) -> b -> a -> c
flip Centroid -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest comp
forall (comp :: Nat). TDigest comp
emptyTDigest ([Centroid] -> TDigest comp) -> [Centroid] -> TDigest comp
forall a b. (a -> b) -> a -> b
$ ((Centroid, Double) -> Centroid)
-> [(Centroid, Double)] -> [Centroid]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Centroid, Double) -> Centroid
forall a b. (a, b) -> a
fst ([(Centroid, Double)] -> [Centroid])
-> [(Centroid, Double)] -> [Centroid]
forall a b. (a -> b) -> a -> b
$ Vector (Centroid, Double) -> [(Centroid, Double)]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector (Centroid, Double)
centroids
  where
    -- Centroids are shuffled based on space
    centroids :: VU.Vector (Centroid, Double)
    centroids :: Vector (Centroid, Double)
centroids = (forall s. ST s (Vector (Centroid, Double)))
-> Vector (Centroid, Double)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector (Centroid, Double)))
 -> Vector (Centroid, Double))
-> (forall s. ST s (Vector (Centroid, Double)))
-> Vector (Centroid, Double)
forall a b. (a -> b) -> a -> b
$ do
        MVector s (Centroid, Double)
v <- TDigest comp -> ST s (MVector s (Centroid, Double))
forall (comp :: Nat) s.
KnownNat comp =>
TDigest comp -> ST s (MVector s (Centroid, Double))
toMVector TDigest comp
td
        -- sort by cumulative weight
        Comparison (Centroid, Double)
-> MVector (PrimState (ST s)) (Centroid, Double) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VHeap.sortBy (((Centroid, Double) -> Double) -> Comparison (Centroid, Double)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Centroid, Double) -> Double
forall a b. (a, b) -> b
snd) MVector s (Centroid, Double)
MVector (PrimState (ST s)) (Centroid, Double)
v
        MVector (PrimState (ST s)) (Centroid, Double)
-> ST s (Vector (Centroid, Double))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Centroid, Double)
MVector (PrimState (ST s)) (Centroid, Double)
v

toMVector
    :: forall comp s. KnownNat comp
    => TDigest comp                           -- ^ t-Digest
    -> ST s (VU.MVector s (Centroid, Double)) -- ^ return also a "space left in the centroid" value for "shuffling"
toMVector :: TDigest comp -> ST s (MVector s (Centroid, Double))
toMVector TDigest comp
td = do
    MVector s (Centroid, Double)
v <- Int -> ST s (MVector (PrimState (ST s)) (Centroid, Double))
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
MVU.new (TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td)
    (Int
i, Double
cum) <- MVector (PrimState (ST s)) (Centroid, Double)
-> Int -> Double -> TDigest comp -> ST s (Int, Double)
forall (f :: * -> *) (compression :: Nat).
PrimMonad f =>
MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector s (Centroid, Double)
MVector (PrimState (ST s)) (Centroid, Double)
v (Int
0 :: Int) (Double
0 :: Double) TDigest comp
td
    MVector s (Centroid, Double) -> ST s (MVector s (Centroid, Double))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MVector s (Centroid, Double)
 -> ST s (MVector s (Centroid, Double)))
-> MVector s (Centroid, Double)
-> ST s (MVector s (Centroid, Double))
forall a b. (a -> b) -> a -> b
$ Bool
-> String
-> MVector s (Centroid, Double)
-> MVector s (Centroid, Double)
forall a. Bool -> String -> a -> a
assert (Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td Bool -> Bool -> Bool
&& Double -> Double
forall a. Num a => a -> a
abs (Double
cum Double -> Double -> Double
forall a. Num a => a -> a -> a
- TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
td) Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
1e-6) String
"traversal in toMVector:" MVector s (Centroid, Double)
v
  where
    go :: MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector (PrimState f) (Centroid, Double)
_ Int
i Double
cum TDigest compression
Nil                   = (Int, Double) -> f (Int, Double)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
i, Double
cum)
    go MVector (PrimState f) (Centroid, Double)
v Int
i Double
cum (Node Int
_ Double
x Double
w Double
_ TDigest compression
l TDigest compression
r) = do
        (Int
i', Double
cum') <- MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector (PrimState f) (Centroid, Double)
v Int
i Double
cum TDigest compression
l
        MVector (PrimState f) (Centroid, Double)
-> Int -> (Centroid, Double) -> f ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVU.unsafeWrite MVector (PrimState f) (Centroid, Double)
v Int
i' ((Double
x, Double
w), Double -> Double -> Double
space Double
w Double
cum')
        MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector (PrimState f) (Centroid, Double)
v (Int
i' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Double
cum' Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
w) TDigest compression
r

    n :: Double
n = TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
td
    compression :: Double
compression = Integer -> Double
forall a. Num a => Integer -> a
fromInteger (Integer -> Double) -> Integer -> Double
forall a b. (a -> b) -> a -> b
$ Proxy comp -> Integer
forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (Proxy comp
forall k (t :: k). Proxy t
Proxy :: Proxy comp)

    space :: Double -> Double -> Double
space Double
w Double
cum = Double
thr Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
w
      where
        q :: Double
q     = (Double
w Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
cum) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
n
        thr :: Double
thr   = Double -> Double -> Double -> Double
threshold Double
n Double
q Double
compression

-------------------------------------------------------------------------------
-- Params
-------------------------------------------------------------------------------

-- | Relative size parameter. Hard-coded value: 25.
relMaxSize :: Int
relMaxSize :: Int
relMaxSize = Int
25

-- | Absolute size parameter. Hard-coded value: 1000.
absMaxSize :: Int
absMaxSize :: Int
absMaxSize = Int
1000

-------------------------------------------------------------------------------
-- Tree balance parameters
-------------------------------------------------------------------------------

balOmega :: Int
balOmega :: Int
balOmega = Int
3

balAlpha :: Int
balAlpha :: Int
balAlpha = Int
2

-- balDelta = 0

-------------------------------------------------------------------------------
-- Debug
-------------------------------------------------------------------------------

-- | Output the 'TDigest' tree.
debugPrint :: TDigest comp -> IO ()
debugPrint :: TDigest comp -> IO ()
debugPrint TDigest comp
td = Int -> TDigest comp -> IO ()
forall (compression :: Nat). Int -> TDigest compression -> IO ()
go Int
0 TDigest comp
td
  where
    go :: Int -> TDigest compression -> IO ()
go Int
i TDigest compression
Nil = String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Char -> String
forall a. Int -> a -> [a]
replicate (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
3) Char
' ' String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"Nil"
    go Int
i (Node Int
s Double
m Double
w Double
tw TDigest compression
l TDigest compression
r) = do
        Int -> TDigest compression -> IO ()
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) TDigest compression
l
        String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Char -> String
forall a. Int -> a -> [a]
replicate (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
3) Char
' ' String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"Node " String -> ShowS
forall a. [a] -> [a] -> [a]
++ (Int, Double, Double, Double) -> String
forall a. Show a => a -> String
show (Int
s,Double
m,Double
w,Double
tw)
        Int -> TDigest compression -> IO ()
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) TDigest compression
r

-- | @'isRight' . 'validate'@
valid :: TDigest comp -> Bool
valid :: TDigest comp -> Bool
valid = Either String (TDigest comp) -> Bool
forall a b. Either a b -> Bool
isRight (Either String (TDigest comp) -> Bool)
-> (TDigest comp -> Either String (TDigest comp))
-> TDigest comp
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest comp -> Either String (TDigest comp)
forall (comp :: Nat). TDigest comp -> Either String (TDigest comp)
validate

-- | Check various invariants in the 'TDigest' tree.
validate :: TDigest comp -> Either String (TDigest comp)
validate :: TDigest comp -> Either String (TDigest comp)
validate TDigest comp
td
    | Bool -> Bool
not ((TDigest comp -> Bool) -> [TDigest comp] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all TDigest comp -> Bool
forall (comp :: Nat). TDigest comp -> Bool
sizeValid   [TDigest comp]
centroids) = String -> Either String (TDigest comp)
forall a b. a -> Either a b
Left String
"invalid sizes"
    | Bool -> Bool
not ((TDigest comp -> Bool) -> [TDigest comp] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all TDigest comp -> Bool
forall (comp :: Nat). TDigest comp -> Bool
weightValid [TDigest comp]
centroids) = String -> Either String (TDigest comp)
forall a b. a -> Either a b
Left String
"invalid weights"
    | Bool -> Bool
not ((TDigest comp -> Bool) -> [TDigest comp] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all TDigest comp -> Bool
forall (comp :: Nat). TDigest comp -> Bool
orderValid  [TDigest comp]
centroids) = String -> Either String (TDigest comp)
forall a b. a -> Either a b
Left String
"invalid ordering"
    | Bool -> Bool
not ((TDigest comp -> Bool) -> [TDigest comp] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all TDigest comp -> Bool
forall (comp :: Nat). TDigest comp -> Bool
balanced    [TDigest comp]
centroids) = String -> Either String (TDigest comp)
forall a b. a -> Either a b
Left String
"tree is ill-balanced"
    | Bool
otherwise = TDigest comp -> Either String (TDigest comp)
forall a b. b -> Either a b
Right TDigest comp
td
  where
    centroids :: [TDigest comp]
centroids = TDigest comp -> [TDigest comp]
forall (compression :: Nat).
TDigest compression -> [TDigest compression]
goc TDigest comp
td

    goc :: TDigest compression -> [TDigest compression]
goc TDigest compression
Nil = []
    goc n :: TDigest compression
n@(Node Int
_ Double
_ Double
_ Double
_ TDigest compression
l TDigest compression
r) = TDigest compression
n TDigest compression
-> [TDigest compression] -> [TDigest compression]
forall a. a -> [a] -> [a]
: TDigest compression -> [TDigest compression]
goc TDigest compression
l [TDigest compression]
-> [TDigest compression] -> [TDigest compression]
forall a. [a] -> [a] -> [a]
++ TDigest compression -> [TDigest compression]
goc TDigest compression
r

    sizeValid :: TDigest comp -> Bool
sizeValid TDigest comp
Nil = Bool
True
    sizeValid (Node Int
s Double
_ Double
_ Double
_ TDigest comp
l TDigest comp
r) = Int
s Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1

    weightValid :: TDigest comp -> Bool
weightValid TDigest comp
Nil = Bool
True
    weightValid (Node Int
_ Double
_ Double
w Double
tw TDigest comp
l TDigest comp
r) = Double -> Double -> Bool
eq Double
tw (Double -> Bool) -> Double -> Bool
forall a b. (a -> b) -> a -> b
$ Double
w Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l Double -> Double -> Double
forall a. Num a => a -> a -> a
+ TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
r

    orderValid :: TDigest compression -> Bool
orderValid TDigest compression
Nil = Bool
True
    orderValid (Node Int
_ Double
_ Double
_ Double
_ TDigest compression
Nil                 TDigest compression
Nil)                 = Bool
True
    orderValid (Node Int
_ Double
x Double
_ Double
_ (Node Int
_ Double
lx Double
_ Double
_ TDigest compression
_ TDigest compression
_) TDigest compression
Nil)                 = Double
lx Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
x
    orderValid (Node Int
_ Double
x Double
_ Double
_ TDigest compression
Nil                 (Node Int
_ Double
rx Double
_ Double
_ TDigest compression
_ TDigest compression
_)) = Double
x Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
rx
    orderValid (Node Int
_ Double
x Double
_ Double
_ (Node Int
_ Double
lx Double
_ Double
_ TDigest compression
_ TDigest compression
_) (Node Int
_ Double
rx Double
_ Double
_ TDigest compression
_ TDigest compression
_)) = Double
lx Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
x Bool -> Bool -> Bool
&& Double
x Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
rx

    balanced :: TDigest comp -> Bool
balanced TDigest comp
Nil = Bool
True
    balanced (Node Int
_ Double
_ Double
_ Double
_ TDigest comp
l TDigest comp
r) =
        TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 (Int
balOmega Int -> Int -> Int
forall a. Num a => a -> a -> a
* TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r) Bool -> Bool -> Bool
&&
        TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 (Int
balOmega Int -> Int -> Int
forall a. Num a => a -> a -> a
* TDigest comp -> Int
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l)

-------------------------------------------------------------------------------
-- Higher level helpers
-------------------------------------------------------------------------------

-- | Insert single value into 'TDigest'.
insert
    :: KnownNat comp
    => Double         -- ^ element
    -> TDigest comp
    -> TDigest comp
insert :: Double -> TDigest comp -> TDigest comp
insert Double
x = TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress (TDigest comp -> TDigest comp)
-> (TDigest comp -> TDigest comp) -> TDigest comp -> TDigest comp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert' Double
x

-- | Insert single value, don't compress 'TDigest' even if needed.
--
-- For sensibly bounded input, it makes sense to let 'TDigest' grow (it might
-- grow linearly in size), and after that compress it once.
insert'
    :: KnownNat comp
    => Double         -- ^ element
    -> TDigest comp
    -> TDigest comp
insert' :: Double -> TDigest comp -> TDigest comp
insert' Double
x = Centroid -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid (Double
x, Double
1)

-- | Make a 'TDigest' of a single data point.
singleton :: KnownNat comp => Double -> TDigest comp
singleton :: Double -> TDigest comp
singleton Double
x = Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert Double
x TDigest comp
forall (comp :: Nat). TDigest comp
emptyTDigest

-- | Strict 'foldl'' over 'Foldable' structure.
tdigest :: (Foldable f, KnownNat comp) => f Double -> TDigest comp
tdigest :: f Double -> TDigest comp
tdigest = (TDigest comp -> [Double] -> TDigest comp)
-> TDigest comp -> [[Double]] -> TDigest comp
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' TDigest comp -> [Double] -> TDigest comp
forall (comp :: Nat) (t :: * -> *).
(KnownNat comp, Foldable t) =>
TDigest comp -> t Double -> TDigest comp
insertChunk TDigest comp
forall (comp :: Nat). TDigest comp
emptyTDigest ([[Double]] -> TDigest comp)
-> (f Double -> [[Double]]) -> f Double -> TDigest comp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Double] -> [[Double]]
forall a. [a] -> [[a]]
chunks ([Double] -> [[Double]])
-> (f Double -> [Double]) -> f Double -> [[Double]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. f Double -> [Double]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList
  where
    -- compress after each chunk, forceCompress at the very end.
    insertChunk :: TDigest comp -> t Double -> TDigest comp
insertChunk TDigest comp
td t Double
xs =
        TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress ((TDigest comp -> Double -> TDigest comp)
-> TDigest comp -> t Double -> TDigest comp
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Double -> TDigest comp -> TDigest comp)
-> TDigest comp -> Double -> TDigest comp
forall a b c. (a -> b -> c) -> b -> a -> c
flip Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert') TDigest comp
td t Double
xs)

    chunks :: [a] -> [[a]]
chunks [] = []
    chunks [a]
xs =
        let ([a]
a, [a]
b) = Int -> [a] -> ([a], [a])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
1000 [a]
xs -- 1000 is totally arbitrary.
        in [a]
a [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: [a] -> [[a]]
chunks [a]
b

-- $setup
-- >>> :set -XDataKinds