module NumHask.Histogram
( Histogram(..)
, freq
, fill
, DealOvers(..)
, fromHist
, hist
, labels
, insert
, insertW
, insertWs
) where
import NumHask.Rect
import Protolude
import qualified Control.Foldl as L
import qualified Data.Map.Strict as Map
import Linear hiding (identity)
import Data.List
import Formatting
import Control.Lens
data Histogram = Histogram
{ _cuts :: [Double]
, _values :: Map.Map Int Double
} deriving (Show, Eq)
freq' :: Map.Map Int Double -> Map.Map Int Double
freq' m = Map.map (* recip (Protolude.sum m)) m
freq :: Histogram -> Histogram
freq (Histogram c v) = Histogram c (freq' v)
count :: L.Fold Int (Map Int Double)
count = L.premap (\x -> (x,1.0)) countW
countW :: L.Fold (Int,Double) (Map Int Double)
countW = L.Fold (\x (a,w) -> Map.insertWith (+) a w x) Map.empty identity
countBool :: L.Fold Bool Int
countBool = L.Fold (\x a -> x + if a then 1 else 0) 0 identity
histMap :: (Functor f, Functor g, Ord a, Foldable f, Foldable g) =>
f a -> g a -> Map Int Double
histMap cuts xs = L.fold count $ (\x -> L.fold countBool (fmap (x >) cuts)) <$> xs
histMapW :: (Functor f, Functor g, Ord a, Foldable f, Foldable g) =>
f a -> g (a,Double) -> Map Int Double
histMapW cuts xs = L.fold countW $
(\x -> (L.fold countBool (fmap (fst x >) cuts),snd x)) <$> xs
fill :: [Double] -> [Double] -> Histogram
fill cuts xs = Histogram cuts (histMap cuts xs)
insertW :: Histogram -> Double -> Double -> Histogram
insertW (Histogram cuts vs) value weight = Histogram cuts (Map.unionWith (+) vs s)
where
s = histMapW cuts [(value,weight)]
insertWs :: Histogram -> [(Double, Double)] -> Histogram
insertWs (Histogram cuts vs) vws = Histogram cuts (Map.unionWith (+) vs s)
where
s = histMapW cuts vws
data DealOvers = IgnoreOvers | IncludeOvers Double
fromHist :: DealOvers -> Histogram -> [Rect Double]
fromHist o (Histogram cuts counts) = view rect <$> zipWith4 V4 x y z w'
where
y = repeat 0
w = zipWith (/)
((\x -> Map.findWithDefault 0 x counts) <$> [f..l])
(zipWith () z x)
f = case o of
IgnoreOvers -> 1
IncludeOvers _ -> 0
l = case o of
IgnoreOvers -> length cuts 1
IncludeOvers _ -> length cuts
w' = (/Protolude.sum w) <$> w
x = case o of
IgnoreOvers -> cuts
IncludeOvers outw -> [Data.List.head cuts outw] <> cuts <> [Data.List.last cuts + outw]
z = drop 1 x
labels :: DealOvers -> [Double] -> [Text]
labels o cuts =
case o of
IgnoreOvers -> inside
IncludeOvers _ -> [ "< " <> sformat (prec 2) (Data.List.head cuts)] <> inside <> [ "> " <> sformat (prec 2) (Data.List.last cuts)]
where
inside = sformat (prec 2) <$> zipWith (\l u -> (l+u)/2) cuts (drop 1 cuts)
hist :: [Double] -> Double -> L.Fold Double Histogram
hist cuts r =
L.Fold
(\(Histogram cuts counts) a ->
Histogram cuts
(Map.unionWith (+)
(Map.map (*r) counts)
(Map.singleton (L.fold countBool (fmap (a>) cuts)) 1)))
(Histogram cuts mempty)
identity