{-# LANGUAGE DataKinds             #-}
{-# LANGUAGE KindSignatures        #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables   #-}
module Data.TDigest.Vector.Internal where

import Control.DeepSeq        (NFData (..))
import Data.Either            (isRight)
import Data.Foldable          (toList)
import Data.List              (foldl', sortBy)
import Data.List.NonEmpty     (nonEmpty)
import Data.Ord               (comparing)
import Data.Proxy             (Proxy (..))
import Data.Semigroup         (Semigroup (..))
import Data.Semigroup.Reducer (Reducer (..))
import GHC.TypeLits           (KnownNat, Nat, natVal)
import Prelude ()
import Prelude.Compat

import qualified Data.Vector.Unboxed as VU

import           Data.TDigest.Internal
import qualified Data.TDigest.Postprocess.Internal as PP

-- import Debug.Trace
--
-- | 'TDigest' is a vector of centroids plus not yet merged elements.
--
-- The size of structure is dictated by @compression@, *𝛿*. And is *O(𝛿)*.
--
data TDigest (compression :: Nat) = TDigest
    { TDigest compression -> Size
tdigestTotalWeight :: !Size                  -- ^ sum of vector and buffer size
    , TDigest compression -> Vector Centroid
tdigestData        :: !(VU.Vector Centroid)  -- ^ actual data. *Invariants:* sorted by mean; length <= 2 𝛿 (soft)
    , TDigest compression -> Size
tdigestBufferSize  :: !Size
    , TDigest compression -> [Double]
tdigestBuffer      :: [Double]               -- ^ addition buffer, elements with weight 1. *Invariants:* length 2 <= 𝛿
    , TDigest compression -> Bool
tdigestDirection   :: !Bool                  -- ^ direction is a hack, so we merge from left and right. *TODO* remove?
    }
  deriving Size -> TDigest compression -> ShowS
[TDigest compression] -> ShowS
TDigest compression -> String
(Size -> TDigest compression -> ShowS)
-> (TDigest compression -> String)
-> ([TDigest compression] -> ShowS)
-> Show (TDigest compression)
forall a.
(Size -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall (compression :: Nat). Size -> TDigest compression -> ShowS
forall (compression :: Nat). [TDigest compression] -> ShowS
forall (compression :: Nat). TDigest compression -> String
showList :: [TDigest compression] -> ShowS
$cshowList :: forall (compression :: Nat). [TDigest compression] -> ShowS
show :: TDigest compression -> String
$cshow :: forall (compression :: Nat). TDigest compression -> String
showsPrec :: Size -> TDigest compression -> ShowS
$cshowsPrec :: forall (compression :: Nat). Size -> TDigest compression -> ShowS
Show

instance KnownNat comp => Semigroup (TDigest comp) where
    <> :: TDigest comp -> TDigest comp -> TDigest comp
(<>) = TDigest comp -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineTDigest

instance KnownNat comp => Monoid (TDigest comp) where
    mempty :: TDigest comp
mempty = TDigest comp
forall (comp :: Nat). TDigest comp
emptyTDigest
    mappend :: TDigest comp -> TDigest comp -> TDigest comp
mappend = TDigest comp -> TDigest comp -> TDigest comp
forall a. Semigroup a => a -> a -> a
(<>)

-- | Both 'cons' and 'snoc' are 'insert'
instance KnownNat comp => Reducer Double (TDigest comp) where
    cons :: Double -> TDigest comp -> TDigest comp
cons = Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
    snoc :: TDigest comp -> Double -> TDigest comp
snoc = (Double -> TDigest comp -> TDigest comp)
-> TDigest comp -> Double -> TDigest comp
forall a b c. (a -> b -> c) -> b -> a -> c
flip Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
    unit :: Double -> TDigest comp
unit = Double -> TDigest comp
forall (comp :: Nat). Double -> TDigest comp
singleton

instance NFData (TDigest comp) where
    rnf :: TDigest comp -> ()
rnf (TDigest Size
_ Vector Centroid
_ Size
_ [Double]
b Bool
_) = [Double] -> ()
forall a. NFData a => a -> ()
rnf [Double]
b

instance KnownNat comp => PP.HasHistogram (TDigest comp) Maybe where
    histogram :: TDigest comp -> Maybe (NonEmpty HistBin)
histogram = (NonEmpty Centroid -> NonEmpty HistBin)
-> Maybe (NonEmpty Centroid) -> Maybe (NonEmpty HistBin)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap NonEmpty Centroid -> NonEmpty HistBin
PP.histogramFromCentroids (Maybe (NonEmpty Centroid) -> Maybe (NonEmpty HistBin))
-> (TDigest comp -> Maybe (NonEmpty Centroid))
-> TDigest comp
-> Maybe (NonEmpty HistBin)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Centroid] -> Maybe (NonEmpty Centroid)
forall a. [a] -> Maybe (NonEmpty a)
nonEmpty ([Centroid] -> Maybe (NonEmpty Centroid))
-> (TDigest comp -> [Centroid])
-> TDigest comp
-> Maybe (NonEmpty Centroid)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Centroid -> [Centroid]
forall a. Unbox a => Vector a -> [a]
VU.toList (Vector Centroid -> [Centroid])
-> (TDigest comp -> Vector Centroid) -> TDigest comp -> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest comp -> Vector Centroid
forall (compression :: Nat). TDigest compression -> Vector Centroid
tdigestData (TDigest comp -> Vector Centroid)
-> (TDigest comp -> TDigest comp)
-> TDigest comp
-> Vector Centroid
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
finalize
    totalWeight :: TDigest comp -> Double
totalWeight = TDigest comp -> Double
forall (comp :: Nat). TDigest comp -> Double
totalWeight

-- | Size of structure
size :: TDigest comp -> Int
size :: TDigest comp -> Size
size TDigest comp
td = Vector Centroid -> Size
forall a. Unbox a => Vector a -> Size
VU.length (TDigest comp -> Vector Centroid
forall (compression :: Nat). TDigest compression -> Vector Centroid
tdigestData TDigest comp
td) Size -> Size -> Size
forall a. Num a => a -> a -> a
+ TDigest comp -> Size
forall (compression :: Nat). TDigest compression -> Size
tdigestBufferSize TDigest comp
td

totalWeight :: TDigest comp -> Weight
totalWeight :: TDigest comp -> Double
totalWeight = Size -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Size -> Double)
-> (TDigest comp -> Size) -> TDigest comp -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest comp -> Size
forall (compression :: Nat). TDigest compression -> Size
tdigestTotalWeight

-- | Center of left-most centroid. Note: may be different than min element inserted.
--
-- >>> minimumValue (tdigest [1..100] :: TDigest 3)
-- 1.0
--
minimumValue :: KnownNat comp => TDigest comp -> Mean
minimumValue :: TDigest comp -> Double
minimumValue TDigest comp
td
    | Vector Centroid -> Bool
forall a. Unbox a => Vector a -> Bool
VU.null Vector Centroid
d = Double
posInf
    | Bool
otherwise = Centroid -> Double
forall a b. (a, b) -> a
fst (Vector Centroid -> Centroid
forall a. Unbox a => Vector a -> a
VU.head Vector Centroid
d)
  where
    d :: Vector Centroid
d = TDigest comp -> Vector Centroid
forall (compression :: Nat). TDigest compression -> Vector Centroid
tdigestData (TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
finalize TDigest comp
td)

-- | Center of right-most centroid. Note: may be different than max element inserted.
--
-- >>> maximumValue (tdigest [1..100] :: TDigest 3)
-- 100.0
--
maximumValue :: KnownNat comp => TDigest comp -> Mean
maximumValue :: TDigest comp -> Double
maximumValue TDigest comp
td
    | Vector Centroid -> Bool
forall a. Unbox a => Vector a -> Bool
VU.null Vector Centroid
d = Double
posInf
    | Bool
otherwise = Centroid -> Double
forall a b. (a, b) -> a
fst (Vector Centroid -> Centroid
forall a. Unbox a => Vector a -> a
VU.last Vector Centroid
d)
  where
    d :: Vector Centroid
d = TDigest comp -> Vector Centroid
forall (compression :: Nat). TDigest compression -> Vector Centroid
tdigestData (TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
finalize TDigest comp
td)

-------------------------------------------------------------------------------
-- Mapping function
-------------------------------------------------------------------------------

-- | Mapping from quantile *q* to notional index *k* with compression parameter *𝛿*.
--
-- >>> ksize 42 0
-- 0.0
--
-- >>> ksize 42 1
-- 42.0
--
-- *q@ is clamped.:
--
-- >>> ksize 42 2
-- 42.0
--
ksize
    :: Double  -- ^ compression parameter, 𝛿
    -> Double  -- ^ quantile, q
    -> Double  -- ^ notional index, k
ksize :: Double -> Double -> Double
ksize Double
comp Double
q = Double
comp Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double -> Double
forall a. Floating a => a -> a
asin (Double
2 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
clamp Double
q Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
forall a. Floating a => a
pi  Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
0.5)

clamp :: Double -> Double
clamp :: Double -> Double
clamp Double
x
    | Double
x Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0.0   = Double
0.0
    | Double
x Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
1.0   = Double
1.0
    | Bool
otherwise = Double
x

-- | Inverse of 'ksize'.
--
-- >>> ksizeInv 42 0
-- 0.0
--
-- >>> ksizeInv 42 42
-- 1.0
--
-- >>> ksizeInv 42 (ksize 42 0.3)
-- 0.3
--
ksizeInv
    :: Double  -- ^ compression parameter, 𝛿
    -> Double  -- ^ notional index, k
    -> Double  -- ^ quantile, q
ksizeInv :: Double -> Double -> Double
ksizeInv Double
comp Double
k
    | Double
k Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
comp = Double
1
    | Double
k Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0    = Double
0
    | Bool
otherwise = Double
0.5 Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double -> Double
forall a. Floating a => a -> a
sin ((Double
k Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
comp Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
0.5) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
forall a. Floating a => a
pi) Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
1)

-------------------------------------------------------------------------------
-- Merging
-------------------------------------------------------------------------------

merge :: Int -> Double -> [(Mean, Weight)] -> [(Mean, Weight)]
merge :: Size -> Double -> [Centroid] -> [Centroid]
merge Size
_   Double
_    []     = []
merge Size
tw' Double
comp (Centroid
y:[Centroid]
ys) = Double -> Double -> Centroid -> [Centroid] -> [Centroid]
go Double
0 (Double -> Double
qLimit' Double
0) Centroid
y [Centroid]
ys
  where
    -- total weight
    tw :: Double
tw = Size -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Size
tw'

    qLimit' :: Double -> Double
    qLimit' :: Double -> Double
qLimit' Double
q0 = Double -> Double -> Double
ksizeInv Double
comp (Double -> Double -> Double
ksize Double
comp Double
q0 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
1)  -- k⁻¹ (k (q₀, 𝛿) + 1, 𝛿)

    go :: Double         -- q0
       -> Double         -- qLimit
       -> (Mean, Weight)   -- sigma
       -> [(Mean, Weight)]
       -> [(Mean, Weight)]
    go :: Double -> Double -> Centroid -> [Centroid] -> [Centroid]
go Double
_q0 Double
_qLimit Centroid
sigma [] = [Centroid
sigma] -- C'.append(σ)
    go  Double
q0  Double
qLimit Centroid
sigma (Centroid
x:[Centroid]
xs)
        | Double
q Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
qLimit = Double -> Double -> Centroid -> [Centroid] -> [Centroid]
go Double
q0 Double
qLimit (Centroid -> Centroid -> Centroid
plus Centroid
sigma Centroid
x) [Centroid]
xs
        | Bool
otherwise   = Centroid
sigma Centroid -> [Centroid] -> [Centroid]
forall a. a -> [a] -> [a]
: Double -> Double -> Centroid -> [Centroid] -> [Centroid]
go Double
q0' (Double -> Double
qLimit' Double
q0') Centroid
x [Centroid]
xs
-- traceShow ("q", sigma, x, q, qLimit) $
      where
        q :: Double
q = Double
q0 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ (Centroid -> Double
forall a b. (a, b) -> b
snd Centroid
sigma Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Centroid -> Double
forall a b. (a, b) -> b
snd Centroid
x) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
tw
        q0' :: Double
q0' = Double
q0 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Centroid -> Double
forall a b. (a, b) -> b
snd Centroid
sigma Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
tw

    plus :: Centroid -> Centroid -> Centroid
    plus :: Centroid -> Centroid -> Centroid
plus (Double
m1,Double
w1) (Double
m2,Double
w2) = ((Double
m1 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
w1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
m2 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
w2) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
w, Double
w) where w :: Double
w = Double
w1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
w2

-------------------------------------------------------------------------------
-- Implementation
-------------------------------------------------------------------------------

emptyTDigest :: TDigest comp
emptyTDigest :: TDigest comp
emptyTDigest = Size -> Vector Centroid -> Size -> [Double] -> Bool -> TDigest comp
forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest Size
0 Vector Centroid
forall a. Monoid a => a
mempty Size
0 [Double]
forall a. Monoid a => a
mempty Bool
True

combineTDigest :: forall comp. KnownNat comp => TDigest comp -> TDigest comp -> TDigest comp
combineTDigest :: TDigest comp -> TDigest comp -> TDigest comp
combineTDigest (TDigest Size
tw Vector Centroid
d Size
_ [Double]
b Bool
dir) (TDigest Size
tw' Vector Centroid
d' Size
_ [Double]
b' Bool
dir') =
    Size -> Vector Centroid -> Size -> [Double] -> Bool -> TDigest comp
forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest (Size
tw Size -> Size -> Size
forall a. Num a => a -> a -> a
+ Size
tw') Vector Centroid
newD Size
0 [] (Bool
dir Bool -> Bool -> Bool
forall a. Eq a => a -> a -> Bool
/= Bool
dir')
  where
    newD :: Vector Centroid
newD = [Centroid] -> Vector Centroid
forall a. Unbox a => [a] -> Vector a
VU.fromList
        ([Centroid] -> Vector Centroid)
-> ([Centroid] -> [Centroid]) -> [Centroid] -> Vector Centroid
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Size -> Double -> [Centroid] -> [Centroid]
merge Size
tw Double
comp
        ([Centroid] -> [Centroid])
-> ([Centroid] -> [Centroid]) -> [Centroid] -> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Centroid -> Centroid -> Ordering) -> [Centroid] -> [Centroid]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy ((Centroid -> Double) -> Centroid -> Centroid -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing Centroid -> Double
forall a b. (a, b) -> a
fst)   -- sort
        ([Centroid] -> Vector Centroid) -> [Centroid] -> Vector Centroid
forall a b. (a -> b) -> a -> b
$ Vector Centroid -> [Centroid]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Centroid
d [Centroid] -> [Centroid] -> [Centroid]
forall a. [a] -> [a] -> [a]
++ Vector Centroid -> [Centroid]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Centroid
d' [Centroid] -> [Centroid] -> [Centroid]
forall a. [a] -> [a] -> [a]
++ (Double -> Centroid) -> [Double] -> [Centroid]
forall a b. (a -> b) -> [a] -> [b]
map ((Double -> Double -> Centroid) -> Double -> Double -> Centroid
forall a b c. (a -> b -> c) -> b -> a -> c
flip (,) Double
1) ([Double]
b [Double] -> [Double] -> [Double]
forall a. [a] -> [a] -> [a]
++ [Double]
b')

    comp :: Double
comp = Integer -> Double
forall a. Num a => Integer -> a
fromInteger (Proxy comp -> Integer
forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (Proxy comp
forall k (t :: k). Proxy t
Proxy :: Proxy comp)) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
forall a. Num a => a
sizeCoefficient

-- | Flush insertion buffer
finalize :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
finalize :: TDigest comp -> TDigest comp
finalize TDigest comp
td
    | [Double] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (TDigest comp -> [Double]
forall (compression :: Nat). TDigest compression -> [Double]
tdigestBuffer TDigest comp
td) = TDigest comp
td
    | Bool
otherwise               = TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress TDigest comp
td

forceCompress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
forceCompress :: TDigest comp -> TDigest comp
forceCompress (TDigest Size
tw Vector Centroid
d Size
_bs [Double]
b Bool
dir) = Size -> Vector Centroid -> Size -> [Double] -> Bool -> TDigest comp
forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest Size
tw Vector Centroid
d' Size
0 [] (Bool -> Bool
not Bool
dir)
  where
    d' :: Vector Centroid
d' = [Centroid] -> Vector Centroid
forall a. Unbox a => [a] -> Vector a
VU.fromList
       ([Centroid] -> Vector Centroid)
-> (Vector Centroid -> [Centroid])
-> Vector Centroid
-> Vector Centroid
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Centroid] -> [Centroid]
forall a. [a] -> [a]
rev
       ([Centroid] -> [Centroid])
-> (Vector Centroid -> [Centroid]) -> Vector Centroid -> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Size -> Double -> [Centroid] -> [Centroid]
merge Size
tw Double
comp            -- compress
       ([Centroid] -> [Centroid])
-> (Vector Centroid -> [Centroid]) -> Vector Centroid -> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Centroid] -> [Centroid]
forall a. [a] -> [a]
rev
       ([Centroid] -> [Centroid])
-> (Vector Centroid -> [Centroid]) -> Vector Centroid -> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Centroid -> Centroid -> Ordering) -> [Centroid] -> [Centroid]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy ((Centroid -> Double) -> Centroid -> Centroid -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing Centroid -> Double
forall a b. (a, b) -> a
fst)   -- sort
       ([Centroid] -> [Centroid])
-> (Vector Centroid -> [Centroid]) -> Vector Centroid -> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Centroid] -> [Centroid] -> [Centroid]
forall a. [a] -> [a] -> [a]
++ (Double -> Centroid) -> [Double] -> [Centroid]
forall a b. (a -> b) -> [a] -> [b]
map ((Double -> Double -> Centroid) -> Double -> Double -> Centroid
forall a b c. (a -> b -> c) -> b -> a -> c
flip (,) Double
1) [Double]
b)  -- add buffer
       ([Centroid] -> [Centroid])
-> (Vector Centroid -> [Centroid]) -> Vector Centroid -> [Centroid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Centroid -> [Centroid]
forall a. Unbox a => Vector a -> [a]
VU.toList
       (Vector Centroid -> Vector Centroid)
-> Vector Centroid -> Vector Centroid
forall a b. (a -> b) -> a -> b
$ Vector Centroid
d
    comp :: Double
comp = Integer -> Double
forall a. Num a => Integer -> a
fromInteger (Proxy comp -> Integer
forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (Proxy comp
forall k (t :: k). Proxy t
Proxy :: Proxy comp)) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
forall a. Num a => a
sizeCoefficient
    rev :: [a] -> [a]
rev | Bool
dir       = [a] -> [a]
forall a. a -> a
id
        | Bool
otherwise = [a] -> [a]
forall a. [a] -> [a]
reverse

compress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
compress :: TDigest comp -> TDigest comp
compress t :: TDigest comp
t@(TDigest Size
_ Vector Centroid
_ Size
bs [Double]
_ Bool
_)
    | Size
bs Size -> Size -> Bool
forall a. Ord a => a -> a -> Bool
> Size
compInt Size -> Size -> Size
forall a. Num a => a -> a -> a
* Size
2 = TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress TDigest comp
t
    | Bool
otherwise        = TDigest comp
t
  where
    compInt :: Size
compInt = Integer -> Size
forall a. Num a => Integer -> a
fromInteger (Proxy comp -> Integer
forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (Proxy comp
forall k (t :: k). Proxy t
Proxy :: Proxy comp)) Size -> Size -> Size
forall a. Num a => a -> a -> a
* Size
forall a. Num a => a
sizeCoefficient

-------------------------------------------------------------------------------
-- Params
-------------------------------------------------------------------------------

sizeCoefficient :: Num a => a
sizeCoefficient :: a
sizeCoefficient = a
32

-------------------------------------------------------------------------------
-- Debug
-------------------------------------------------------------------------------

-- | @'isRight' . 'validate'@
valid :: TDigest comp -> Bool
valid :: TDigest comp -> Bool
valid = Either String (TDigest comp) -> Bool
forall a b. Either a b -> Bool
isRight (Either String (TDigest comp) -> Bool)
-> (TDigest comp -> Either String (TDigest comp))
-> TDigest comp
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest comp -> Either String (TDigest comp)
forall (comp :: Nat). TDigest comp -> Either String (TDigest comp)
validate

-- | Check various invariants in the 'TDigest' structure.
validate :: TDigest comp -> Either String (TDigest comp)
validate :: TDigest comp -> Either String (TDigest comp)
validate td :: TDigest comp
td@(TDigest Size
tw Vector Centroid
d Size
bs [Double]
b Bool
_dir)
    | Bool -> Bool
not (Size
bs Size -> Size -> Bool
forall a. Eq a => a -> a -> Bool
== [Double] -> Size
forall (t :: * -> *) a. Foldable t => t a -> Size
length [Double]
b) =
        String -> Either String (TDigest comp)
forall a b. a -> Either a b
Left (String -> Either String (TDigest comp))
-> String -> Either String (TDigest comp)
forall a b. (a -> b) -> a -> b
$ String
"Buffer lenght don't match: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ (Size, Size) -> String
forall a. Show a => a -> String
show (Size
bs, [Double] -> Size
forall (t :: * -> *) a. Foldable t => t a -> Size
length [Double]
b)
    | Bool -> Bool
not (Size
tw Size -> Size -> Bool
forall a. Eq a => a -> a -> Bool
== Size
bs Size -> Size -> Size
forall a. Num a => a -> a -> a
+ Double -> Size
forall a b. (RealFrac a, Integral b) => a -> b
round Double
dw) =
        String -> Either String (TDigest comp)
forall a b. a -> Either a b
Left String
"Total weight doesn't match"
    | [Centroid]
dl [Centroid] -> [Centroid] -> Bool
forall a. Eq a => a -> a -> Bool
/= (Centroid -> Centroid -> Ordering) -> [Centroid] -> [Centroid]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy ((Centroid -> Double) -> Centroid -> Centroid -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing Centroid -> Double
forall a b. (a, b) -> a
fst) [Centroid]
dl =
        String -> Either String (TDigest comp)
forall a b. a -> Either a b
Left String
"Data buffer isn't ordered"
    | Bool
otherwise = TDigest comp -> Either String (TDigest comp)
forall a b. b -> Either a b
Right TDigest comp
td
  where
    dl :: [Centroid]
    dl :: [Centroid]
dl = Vector Centroid -> [Centroid]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Centroid
d

    -- total weight of @d@
    dw :: Double
    dw :: Double
dw = [Double] -> Double
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((Centroid -> Double) -> [Centroid] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Centroid -> Double
forall a b. (a, b) -> b
snd [Centroid]
dl)

-------------------------------------------------------------------------------
-- Higher level helpers
-------------------------------------------------------------------------------

-- | Insert single value into 'TDigest'.
insert
    :: KnownNat comp
    => Double  -- ^ element
    -> TDigest comp
    -> TDigest comp
insert :: Double -> TDigest comp -> TDigest comp
insert Double
x  = TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress (TDigest comp -> TDigest comp)
-> (TDigest comp -> TDigest comp) -> TDigest comp -> TDigest comp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert' Double
x

-- | Insert single value, don't compress 'TDigest' even if needed.
--
-- This may violate the insertion buffer size invariant.
--
-- For sensibly bounded input, it makes sense to let 'TDigest' grow (it might
-- grow linearly in size), and after that compress it once.
insert'
    :: KnownNat comp
    => Double         -- ^ element
    -> TDigest comp
    -> TDigest comp
insert' :: Double -> TDigest comp -> TDigest comp
insert' Double
x (TDigest Size
s Vector Centroid
d Size
sb [Double]
b Bool
dir) = Size -> Vector Centroid -> Size -> [Double] -> Bool -> TDigest comp
forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest (Size
s Size -> Size -> Size
forall a. Num a => a -> a -> a
+ Size
1) Vector Centroid
d (Size
sb Size -> Size -> Size
forall a. Num a => a -> a -> a
+ Size
1) (Double
x Double -> [Double] -> [Double]
forall a. a -> [a] -> [a]
: [Double]
b) Bool
dir

-- | Make a 'TDigest' of a single data point.
singleton :: Double -> TDigest comp
singleton :: Double -> TDigest comp
singleton Double
x = Size -> Vector Centroid -> Size -> [Double] -> Bool -> TDigest comp
forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest Size
1 (Centroid -> Vector Centroid
forall a. Unbox a => a -> Vector a
VU.singleton (Double
x, Double
1)) Size
0 [] Bool
True

-- | Strict 'foldl'' over 'Foldable' structure.
tdigest :: (Foldable f, KnownNat comp) => f Double -> TDigest comp
tdigest :: f Double -> TDigest comp
tdigest = (TDigest comp -> Double -> TDigest comp)
-> TDigest comp -> [Double] -> TDigest comp
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Double -> TDigest comp -> TDigest comp)
-> TDigest comp -> Double -> TDigest comp
forall a b c. (a -> b -> c) -> b -> a -> c
flip Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert) TDigest comp
forall a. Monoid a => a
mempty ([Double] -> TDigest comp)
-> (f Double -> [Double]) -> f Double -> TDigest comp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. f Double -> [Double]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList

-- $setup
-- >>> :set -XDataKinds