module Online.Quantiles
( tDigest
, tDigestQuantiles
, tDigestHist
, OnlineTDigest(..)
, onlineQuantiles
, Online.Quantiles.median
, onlineDigitize
, onlineDigestHist
)
where
import qualified Control.Foldl as L
import Data.List.NonEmpty (NonEmpty)
import Data.TDigest
import Data.TDigest.Internal
import Data.TDigest.Postprocess ()
import qualified Data.Vector.Algorithms.Heap as VHeap
import qualified Data.Vector.Unboxed as VU
import NumHask.Prelude
tDigest :: L.Fold Double (TDigest 25)
tDigest = L.Fold step begin done
where
step x a = insert a x
begin = tdigest ([] :: [Double]) :: TDigest 25
done = identity
tDigestQuantiles :: [Double] -> L.Fold Double [Double]
tDigestQuantiles qs = L.Fold step begin done
where
step x a = insert a x
begin = tdigest ([] :: [Double]) :: TDigest 25
done x = fromMaybe nan . (`quantile` compress x) <$> qs
tDigestHist :: L.Fold Double (Maybe (NonEmpty HistBin))
tDigestHist = L.Fold step begin done
where
step x a = insert a x
begin = tdigest ([] :: [Double]) :: TDigest 25
done = histogram . compress
data OnlineTDigest = OnlineTDigest
{ td :: TDigest 25
, tdN :: Int
, tdRate :: Double
} deriving (Show)
emptyOnlineTDigest :: Double -> OnlineTDigest
emptyOnlineTDigest = OnlineTDigest (emptyTDigest :: TDigest n) 0
onlineQuantiles :: Double -> [Double] -> L.Fold Double [Double]
onlineQuantiles r qs = L.Fold step begin done
where
step x a = onlineInsert a x
begin = emptyOnlineTDigest r
done x = fromMaybe nan . (`quantile` t) <$> qs
where
(OnlineTDigest t _ _) = onlineForceCompress x
median :: Double -> L.Fold Double Double
median r = L.Fold step begin done
where
step x a = onlineInsert a x
begin = emptyOnlineTDigest r
done x = fromMaybe nan (quantile 0.5 t)
where
(OnlineTDigest t _ _) = onlineForceCompress x
onlineInsert' :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert' x (OnlineTDigest td' n r) =
OnlineTDigest
(insertCentroid (x, r ^^ ((fromIntegral $ n + 1))) td')
(n + 1)
r
onlineInsert :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert x otd = onlineCompress (onlineInsert' x otd)
onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress otd@(OnlineTDigest Nil _ _) = otd
onlineCompress otd@(OnlineTDigest t _ _)
| Data.TDigest.Internal.size t > relMaxSize * compression &&
Data.TDigest.Internal.size t > absMaxSize = onlineForceCompress otd
| otherwise = otd
where
compression = 25
onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress otd@(OnlineTDigest Nil _ _) = otd
onlineForceCompress (OnlineTDigest t n r) = OnlineTDigest t' 0 r
where
t' =
NumHask.Prelude.foldl' (flip insertCentroid) emptyTDigest $
(\(m, w) -> (m, w * (r ^^ fromIntegral n))) . fst <$> VU.toList centroids
centroids :: VU.Vector (Centroid, Double)
centroids =
runST $ do
v <- toMVector t
VHeap.sortBy (comparing snd) v
f <- VU.unsafeFreeze v
pure f
onlineDigitize :: Double -> [Double] -> L.Fold Double Int
onlineDigitize r qs = L.Fold step begin done
where
step (x, _) a = (onlineInsert a x, a)
begin = (emptyOnlineTDigest r, nan)
done (x, l) = bucket' qs' l
where
qs' = fromMaybe nan . (`quantile` t) <$> qs
(OnlineTDigest t _ _) = onlineForceCompress x
bucket' xs l' =
L.fold L.sum $
(\x' ->
if x' > l'
then 0
else 1) <$>
xs
onlineDigestHist :: Double -> L.Fold Double (Maybe (NonEmpty HistBin))
onlineDigestHist r = L.Fold step begin done
where
step x a = onlineInsert a x
begin = emptyOnlineTDigest r
done x = histogram . compress $ t
where
(OnlineTDigest t _ _) = onlineForceCompress x