{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DataKinds #-}
module Data.Mealy.Quantiles
( median,
quantiles,
digitize,
)
where
import Data.Mealy
import Data.Ord
import Data.TDigest hiding (median)
import Data.TDigest.Internal
import Data.TDigest.Tree.Internal (TDigest (..), absMaxSize, emptyTDigest, insertCentroid, relMaxSize, size, toMVector)
import qualified Data.Vector.Algorithms.Heap as VHeap
import qualified Data.Vector.Unboxed as VU
import NumHask.Prelude hiding (fold)
data OnlineTDigest
= OnlineTDigest
{ td :: TDigest 25,
tdN :: Int,
tdRate :: Double
}
deriving (Show)
emptyOnlineTDigest :: Double -> OnlineTDigest
emptyOnlineTDigest = OnlineTDigest (emptyTDigest :: TDigest n) 0
quantiles :: Double -> [Double] -> Mealy Double [Double]
quantiles r qs = M inject step extract
where
step x a = onlineInsert a x
inject a = onlineInsert a (emptyOnlineTDigest r)
extract x = fromMaybe (0 / 0) . (`quantile` t) <$> qs
where
(OnlineTDigest t _ _) = onlineForceCompress x
median :: Double -> Mealy Double Double
median r = M inject step extract
where
step x a = onlineInsert a x
inject a = onlineInsert a (emptyOnlineTDigest r)
extract x = fromMaybe (0 / 0) (quantile 0.5 t)
where
(OnlineTDigest t _ _) = onlineForceCompress x
onlineInsert' :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert' x (OnlineTDigest td' n r) =
OnlineTDigest
(insertCentroid (x, r ^^ (- (fromIntegral $ n + 1) :: Integer)) td')
(n + 1)
r
onlineInsert :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert x otd = onlineCompress (onlineInsert' x otd)
onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress otd@(OnlineTDigest Nil _ _) = otd
onlineCompress otd@(OnlineTDigest t _ _)
| Data.TDigest.Tree.Internal.size t > relMaxSize * compression
&& Data.TDigest.Tree.Internal.size t > absMaxSize =
onlineForceCompress otd
| otherwise = otd
where
compression = 25
onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress otd@(OnlineTDigest Nil _ _) = otd
onlineForceCompress (OnlineTDigest t n r) = OnlineTDigest t' 0 r
where
t' =
foldl' (flip insertCentroid) emptyTDigest $
(\(m, w) -> (m, w * (r ^^ n))) . fst <$> VU.toList centroids
centroids :: VU.Vector (Centroid, Double)
centroids =
runST $ do
v <- toMVector t
VHeap.sortBy (comparing snd) v
VU.unsafeFreeze v
digitize :: Double -> [Double] -> Mealy Double Int
digitize r qs = M inject step extract
where
step (x, _) a = (onlineInsert a x, a)
inject a = (onlineInsert a (emptyOnlineTDigest r), a)
extract (x, l) = bucket' qs' l
where
qs' = fromMaybe (0 / 0) . (`quantile` t) <$> qs
(OnlineTDigest t _ _) = onlineForceCompress x
bucket' xs l' =
fold (M id (+) id) $
( \x' ->
if x' > l'
then 0
else 1
)
<$> xs