{-# LANGUAGE DataKinds #-}

module Online.Quantiles
  ( tDigest
  , tDigestQuantiles
  , tDigestHist
  , OnlineTDigest(..)
  , onlineQuantiles
  , Online.Quantiles.median
  , onlineDigitize
  , onlineDigestHist
  )
where

import qualified Control.Foldl as L
import Data.List.NonEmpty (NonEmpty)
import Data.TDigest
import Data.TDigest.Internal
import Data.TDigest.Tree.Internal (TDigest(..), size, emptyTDigest, insertCentroid, relMaxSize, absMaxSize, toMVector)
import Data.TDigest.Postprocess (HistBin, histogram)
import qualified Data.Vector.Algorithms.Heap as VHeap
import qualified Data.Vector.Unboxed as VU
import Prelude
import Control.Monad.ST (runST)
import Data.Maybe
import Data.Ord
import Data.Foldable

-- | a raw non-online tdigest fold
tDigest :: L.Fold Double (TDigest 25)
tDigest = L.Fold step begin done
  where
    step x a = insert a x
    begin = tdigest ([] :: [Double]) :: TDigest 25
    done = id

-- | non-online version
tDigestQuantiles :: [Double] -> L.Fold Double [Double]
tDigestQuantiles qs = L.Fold step begin done
  where
    step x a = insert a x
    begin = tdigest ([] :: [Double]) :: TDigest 25
    done x = fromMaybe (0/0) . (`quantile` compress x) <$> qs

-- | non-online version
tDigestHist :: L.Fold Double (Maybe (NonEmpty HistBin))
tDigestHist = L.Fold step begin done
  where
    step x a = insert a x
    begin = tdigest ([] :: [Double]) :: TDigest 25
    done = histogram . compress

data OnlineTDigest = OnlineTDigest
  { td :: TDigest 25
  , tdN :: Int
  , tdRate :: Double
  } deriving (Show)

emptyOnlineTDigest :: Double -> OnlineTDigest
emptyOnlineTDigest = OnlineTDigest (emptyTDigest :: TDigest n) 0

-- | decaying quantiles based on the tdigest library
onlineQuantiles :: Double -> [Double] -> L.Fold Double [Double]
onlineQuantiles r qs = L.Fold step begin done
  where
    step x a = onlineInsert a x
    begin = emptyOnlineTDigest r
    done x = fromMaybe (0/0) . (`quantile` t) <$> qs
      where
        (OnlineTDigest t _ _) = onlineForceCompress x

median :: Double -> L.Fold Double Double
median r = L.Fold step begin done
  where
    step x a = onlineInsert a x
    begin = emptyOnlineTDigest r
    done x = fromMaybe (0/0) (quantile 0.5 t)
      where
        (OnlineTDigest t _ _) = onlineForceCompress x

onlineInsert' :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert' x (OnlineTDigest td' n r) =
  OnlineTDigest
    (insertCentroid (x, r ^^ (-(fromIntegral $ n + 1) :: Integer)) td')
    (n + 1)
    r

onlineInsert :: Double -> OnlineTDigest -> OnlineTDigest
onlineInsert x otd = onlineCompress (onlineInsert' x otd)

onlineCompress :: OnlineTDigest -> OnlineTDigest
onlineCompress otd@(OnlineTDigest Nil _ _) = otd
onlineCompress otd@(OnlineTDigest t _ _)
  | Data.TDigest.Tree.Internal.size t > relMaxSize * compression &&
      Data.TDigest.Tree.Internal.size t > absMaxSize = onlineForceCompress otd
  | otherwise = otd
  where
    compression = 25

onlineForceCompress :: OnlineTDigest -> OnlineTDigest
onlineForceCompress otd@(OnlineTDigest Nil _ _) = otd
onlineForceCompress (OnlineTDigest t n r) = OnlineTDigest t' 0 r
  where
    t' =
      foldl' (flip insertCentroid) emptyTDigest $
      (\(m, w) -> (m, w * (r ^^ n))) . fst <$> VU.toList centroids
    -- Centroids are shuffled based on space
    centroids :: VU.Vector (Centroid, Double)
    centroids =
      runST $ do
        v <- toMVector t
        -- sort by cumulative weight
        VHeap.sortBy (comparing snd) v
        VU.unsafeFreeze v

onlineDigitize :: Double -> [Double] -> L.Fold Double Int
onlineDigitize r qs = L.Fold step begin done
  where
    step (x, _) a = (onlineInsert a x, a)
    begin = (emptyOnlineTDigest r, 0/0)
    done (x, l) = bucket' qs' l
      where
        qs' = fromMaybe (0/0) . (`quantile` t) <$> qs
        (OnlineTDigest t _ _) = onlineForceCompress x
        bucket' xs l' =
          L.fold L.sum $
          (\x' ->
             if x' > l'
               then 0
               else 1) <$>
          xs

-- | decaying histogram based on the tdigest library
onlineDigestHist :: Double -> L.Fold Double (Maybe (NonEmpty HistBin))
onlineDigestHist r = L.Fold step begin done
  where
    step x a = onlineInsert a x
    begin = emptyOnlineTDigest r
    done x = histogram . compress $ t
      where
        (OnlineTDigest t _ _) = onlineForceCompress x