{-# LANGUAGE DataKinds #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# OPTIONS_GHC -Wall #-}
module NumHask.Histogram
( Histogram(..)
, DealOvers(..)
, fill
, regular
, makeRects
, regularQuantiles
, quantileFold
, fromQuantiles
, freq
) where
import Data.TDigest
import NumHask.Prelude
import NumHask.Range
import NumHask.Rect
import NumHask.Space
import qualified Data.Map as Map
import qualified Control.Foldl as L
import qualified Data.List
data Histogram = Histogram
{ cuts :: [Double]
, values :: Map.Map Int Double
} deriving (Show, Eq)
data DealOvers = IgnoreOvers | IncludeOvers Double
fill :: (Functor f, Foldable f) => [Double] -> f Double -> Histogram
fill cs xs = Histogram cs (histMap cs xs)
where
histMap cs' xs' = L.fold count $
(\x -> L.fold countBool (fmap (x >) cs')) <$> xs'
count = L.premap (\x -> (x,1.0)) countW
countBool = L.Fold (\x a -> x + if a then 1 else 0) 0 identity
countW = L.Fold (\x (a,w) -> Map.insertWith (+) a w x) Map.empty identity
regular :: Int -> [Double] -> Histogram
regular n xs = fill cs xs
where
cs = grid OuterPos (space xs :: Range Double) n
makeRects :: DealOvers -> Histogram -> [Rect Double]
makeRects o (Histogram cs counts) = Data.List.zipWith4 Rect x z y w'
where
y = repeat 0
w = zipWith (/)
((\x' -> Map.findWithDefault 0 x' counts) <$> [f..l])
(zipWith (-) z x)
f = case o of
IgnoreOvers -> 1
IncludeOvers _ -> 0
l = case o of
IgnoreOvers -> length cs - 1
IncludeOvers _ -> length cs
w' = (/sum w) <$> w
x = case o of
IgnoreOvers -> cs
IncludeOvers outw ->
[Data.List.head cs - outw] <>
cs <>
[Data.List.last cs + outw]
z = drop 1 x
regularQuantiles :: Double -> [Double] -> [Double]
regularQuantiles n = L.fold (quantileFold qs)
where
qs = ((1 / n) *) <$> [0 .. n]
quantileFold :: [Double] -> L.Fold Double [Double]
quantileFold qs = L.Fold step begin done
where
step x a = Data.TDigest.insert a x
begin = tdigest ([] :: [Double]) :: TDigest 25
done x = fromMaybe nan . (`quantile` compress x) <$> qs
fromQuantiles :: [Double] -> [Double] -> Histogram
fromQuantiles qs xs = Histogram xs (Map.fromList $ zip [1..] (diffq qs))
where
diffq [] = []
diffq [_] = []
diffq (x:xs') = L.fold (L.Fold step (x,[]) (reverse . snd)) xs'
step (a0,xs') a = (a,(a-a0):xs')
freq :: Histogram -> Histogram
freq (Histogram cs vs) = Histogram cs $ Map.map (* recip (sum vs)) vs