module Online.Quantiles where
import NumHask.Prelude
import qualified Control.Foldl as L
import Data.TDigest
import Data.TDigest.Internal
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Algorithms.Heap as VHeap
import Data.List.NonEmpty (NonEmpty)
import Data.TDigest.Postprocess()
tDigest :: L.Fold Double (TDigest 25)
tDigest = L.Fold step begin done
where
step x a = insert a x
begin = tdigest ([]::[Double]) :: TDigest 25
done = identity
tDigestQuantiles :: [Double] -> L.Fold Double [Double]
tDigestQuantiles qs = L.Fold step begin done
where
step x a = insert a x
begin = tdigest ([]::[Double]) :: TDigest 25
done x = fromMaybe nan . (`quantile` compress x) <$> qs
tDigestHist :: L.Fold Double (Maybe (NonEmpty HistBin))
tDigestHist = L.Fold step begin done
where
step x a = insert a x
begin = tdigest ([]::[Double]) :: TDigest 25
done x = histogram . compress $ x
data OnlineTDigest = OnlineTDigest { td :: TDigest 25, tdN :: Int, tdRate :: Double } deriving (Show)
emptyOnlineTDigest :: Double -> OnlineTDigest
emptyOnlineTDigest r = OnlineTDigest (emptyTDigest :: TDigest n) 0 r
onlineQuantiles :: Double -> [Double] -> L.Fold Double [Double]
onlineQuantiles r qs = L.Fold step begin done
where
step x a = onlineInsert a x
begin = emptyOnlineTDigest r
done x = fromMaybe nan . (`quantile` t) <$> qs
where
(OnlineTDigest t _ _) = onlineForceCompress x
median :: Double -> L.Fold Double Double
median r = L.Fold step begin done
where
step x a = onlineInsert a x
begin = emptyOnlineTDigest r
done x = fromMaybe nan (quantile 0.5 t)
where
(OnlineTDigest t _ _) = onlineForceCompress x
onlineInsert' :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert' x (OnlineTDigest td n r) = OnlineTDigest (insertCentroid (x, r^^((fromIntegral $ n+1))) td) (n+1) r
onlineInsert :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert x otd = onlineCompress (onlineInsert' x otd)
onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress otd@(OnlineTDigest Nil _ _ ) = otd
onlineCompress otd@(OnlineTDigest t _ _)
| Data.TDigest.Internal.size t > relMaxSize * compression && Data.TDigest.Internal.size t > absMaxSize
= onlineForceCompress otd
| otherwise
= otd
where
compression = 25
onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress otd@(OnlineTDigest Nil _ _ ) = otd
onlineForceCompress (OnlineTDigest t n r) = OnlineTDigest t' 0 r
where
t' = NumHask.Prelude.foldl' (flip insertCentroid) emptyTDigest $
(\(m,w) -> (m, w*(r^^fromIntegral n))) . fst <$> VU.toList centroids
centroids :: VU.Vector (Centroid, Double)
centroids = runST $ do
v <- toMVector t
VHeap.sortBy (comparing snd) v
f <- VU.unsafeFreeze v
pure f
onlineDigitize :: Double -> [Double] -> L.Fold Double Int
onlineDigitize r qs = L.Fold step begin done
where
step (x,_) a = (onlineInsert a x, a)
begin = (emptyOnlineTDigest r, nan)
done (x,l) = bucket' qs' l
where
qs' = fromMaybe nan . (`quantile` t) <$> qs
(OnlineTDigest t _ _) = onlineForceCompress x
bucket' xs l' = L.fold L.sum $ (\x' -> if x'>l' then 0 else 1) <$> xs
onlineDigestHist :: Double -> L.Fold Double (Maybe (NonEmpty HistBin))
onlineDigestHist r = L.Fold step begin done
where
step x a = onlineInsert a x
begin = emptyOnlineTDigest r
done x = histogram . compress $ t
where
(OnlineTDigest t _ _) = onlineForceCompress x