{-# LANGUAGE DataKinds #-}
{-# LANGUAGE RebindableSyntax #-}
{-# LANGUAGE StrictData #-}
{-# OPTIONS_GHC -Wall #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}

-- | A histogram, if you squint, is a series of contiguous 'Range's, annotated with values.
module NumHask.Space.Histogram
  ( Histogram (..),
    DealOvers (..),
    fill,
    cutI,
    regular,
    makeRects,
    regularQuantiles,
    quantileFold,
    freq,
    average,
    quantiles,
    quantile,
  )
where

import qualified Data.Map as Map
import qualified Data.TDigest as TD
import qualified Data.Vector as V
import NumHask.Prelude
import NumHask.Space.Range
import NumHask.Space.Rect
import NumHask.Space.Types

-- | This Histogram is a list of contiguous boundaries (a boundary being the lower edge of one bucket and the upper edge of another), and a value (usually a count) for each bucket, represented here as a map
--
-- Overs and Unders are contained in key = 0 and key = length cuts
-- Intervals are defined as (l,u]
data Histogram = Histogram
  { Histogram -> Vector Double
cuts :: V.Vector Double, -- bucket boundaries
    Histogram -> Map Int Double
values :: Map.Map Int Double -- bucket counts
  }
  deriving (Int -> Histogram -> ShowS
[Histogram] -> ShowS
Histogram -> String
(Int -> Histogram -> ShowS)
-> (Histogram -> String)
-> ([Histogram] -> ShowS)
-> Show Histogram
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Histogram] -> ShowS
$cshowList :: [Histogram] -> ShowS
show :: Histogram -> String
$cshow :: Histogram -> String
showsPrec :: Int -> Histogram -> ShowS
$cshowsPrec :: Int -> Histogram -> ShowS
Show, Histogram -> Histogram -> Bool
(Histogram -> Histogram -> Bool)
-> (Histogram -> Histogram -> Bool) -> Eq Histogram
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Histogram -> Histogram -> Bool
$c/= :: Histogram -> Histogram -> Bool
== :: Histogram -> Histogram -> Bool
$c== :: Histogram -> Histogram -> Bool
Eq)

-- | Whether or not to ignore unders and overs.  If overs and unders are dealt with, IncludeOvers supplies an assumed width for the outer buckets.
data DealOvers = IgnoreOvers | IncludeOvers Double

-- | Fill a Histogram using pre-specified cuts
--
-- >>> fill [0,50,100] [0..99]
-- Histogram {cuts = [0.0,50.0,100.0], values = fromList [(1,50.0),(2,50.0)]}
fill :: (Foldable f) => [Double] -> f Double -> Histogram
fill :: [Double] -> f Double -> Histogram
fill [Double]
cs f Double
xs =
  Vector Double -> Map Int Double -> Histogram
Histogram
    ([Double] -> Vector Double
forall a. [a] -> Vector a
V.fromList [Double]
cs)
    ((Map Int Double -> Double -> Map Int Double)
-> Map Int Double -> f Double -> Map Int Double
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Map Int Double
x Double
a -> (Double -> Double -> Double)
-> Int -> Double -> Map Int Double -> Map Int Double
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
Map.insertWith Double -> Double -> Double
forall a. Additive a => a -> a -> a
(+) (Vector Double -> Double -> Int
forall a. Ord a => Vector a -> a -> Int
cutI ([Double] -> Vector Double
forall a. [a] -> Vector a
V.fromList [Double]
cs) Double
a) Double
1 Map Int Double
x) Map Int Double
forall k a. Map k a
Map.empty f Double
xs)

-- | find the index of the bucket the value is contained in.
cutI :: (Ord a) => V.Vector a -> a -> Int
cutI :: Vector a -> a -> Int
cutI Vector a
cs a
a = Range Int -> Int
go (Int -> Int -> Range Int
forall a. a -> a -> Range a
Range Int
forall a. Additive a => a
zero (Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
cs))
  where
    go :: Range Int -> Int
go (Range Int
l Int
u) =
      let k :: Int
k = (Int
u Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
l) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2
       in case a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
a (Vector a
cs Vector a -> Int -> a
forall a. Vector a -> Int -> a
V.! Int
k) of
            Ordering
EQ -> Int
k Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
1
            Ordering
LT -> Int -> Int -> Bool -> Int
forall a. a -> a -> Bool -> a
bool (Range Int -> Int
go (Int -> Int -> Range Int
forall a. a -> a -> Range a
Range Int
l Int
k)) Int
k (Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
k)
            Ordering
GT ->
              Int -> Int -> Bool -> Int
forall a. a -> a -> Bool -> a
bool
                ( case a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
a (Vector a
cs Vector a -> Int -> a
forall a. Vector a -> Int -> a
V.! (Int
k Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
forall a. Multiplicative a => a
one)) of
                    Ordering
EQ -> Int
k Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
2
                    Ordering
LT -> Int
k Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
1
                    Ordering
GT -> Range Int -> Int
go (Int -> Int -> Range Int
forall a. a -> a -> Range a
Range Int
k Int
u)
                )
                (Int
k Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
1)
                (Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
u Int -> Int -> Int
forall a. Subtractive a => a -> a -> a
- Int
forall a. Multiplicative a => a
one)

-- | Make a histogram using n equally spaced cuts over the entire range of the data
--
-- >>> regular 4 [0..100]
-- Histogram {cuts = [0.0,25.0,50.0,75.0,100.0], values = fromList [(1,25.0),(2,25.0),(3,25.0),(4,25.0),(5,1.0)]}
regular :: Int -> [Double] -> Histogram
regular :: Int -> [Double] -> Histogram
regular Int
n [Double]
xs = [Double] -> [Double] -> Histogram
forall (f :: * -> *).
Foldable f =>
[Double] -> f Double -> Histogram
fill [Double]
cs [Double]
xs
  where
    cs :: [Element (Range Double)]
cs = Pos
-> Range Double -> Grid (Range Double) -> [Element (Range Double)]
forall s. FieldSpace s => Pos -> s -> Grid s -> [Element s]
grid Pos
OuterPos ([Element (Range Double)] -> Range Double
forall s (f :: * -> *).
(Space s, Traversable f) =>
f (Element s) -> s
space1 [Double]
[Element (Range Double)]
xs :: Range Double) Int
Grid (Range Double)
n

-- | Transform a Histogram to Rects
--
-- >>> makeRects IgnoreOvers (regular 4 [0..100])
-- [Rect 0.0 25.0 0.0 0.25,Rect 25.0 50.0 0.0 0.25,Rect 50.0 75.0 0.0 0.25,Rect 75.0 100.0 0.0 0.25]
makeRects :: DealOvers -> Histogram -> [Rect Double]
makeRects :: DealOvers -> Histogram -> [Rect Double]
makeRects DealOvers
o (Histogram Vector Double
cs Map Int Double
counts) = Vector (Rect Double) -> [Rect Double]
forall a. Vector a -> [a]
V.toList (Vector (Rect Double) -> [Rect Double])
-> Vector (Rect Double) -> [Rect Double]
forall a b. (a -> b) -> a -> b
$ (Double -> Double -> Double -> Rect Double)
-> Vector Double
-> Vector Double
-> Vector Double
-> Vector (Rect Double)
forall a b c d.
(a -> b -> c -> d) -> Vector a -> Vector b -> Vector c -> Vector d
V.zipWith3 (\Double
x Double
z Double
w' -> Double -> Double -> Double -> Double -> Rect Double
forall a. a -> a -> a -> a -> Rect a
Rect Double
x Double
z Double
forall a. Additive a => a
zero Double
w') Vector Double
x Vector Double
z Vector Double
w'
  where
    w :: Vector Double
w =
      (Double -> Double -> Double)
-> Vector Double -> Vector Double -> Vector Double
forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith
        Double -> Double -> Double
forall a. Divisive a => a -> a -> a
(/)
        ((\Int
x' -> Double -> Int -> Map Int Double -> Double
forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault Double
0 Int
x' Map Int Double
counts) (Int -> Double) -> Vector Int -> Vector Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Int -> Vector Int
forall a. Num a => a -> Int -> Vector a
V.enumFromN Int
f (Int
l Int -> Int -> Int
forall a. Subtractive a => a -> a -> a
- Int
f Int -> Int -> Int
forall a. Additive a => a -> a -> a
+ Int
forall a. Multiplicative a => a
one))
        ((Double -> Double -> Double)
-> Vector Double -> Vector Double -> Vector Double
forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith (-) Vector Double
z Vector Double
x)
    f :: Int
f = case DealOvers
o of
      DealOvers
IgnoreOvers -> Int
forall a. Multiplicative a => a
one
      IncludeOvers Double
_ -> Int
forall a. Additive a => a
zero
    l :: Int
l = case DealOvers
o of
      DealOvers
IgnoreOvers -> Vector Double -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Vector Double
cs Int -> Int -> Int
forall a. Subtractive a => a -> a -> a
- Int
forall a. Multiplicative a => a
one
      IncludeOvers Double
_ -> Vector Double -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Vector Double
cs
    w' :: Vector Double
w' = (Double -> Double -> Double
forall a. Divisive a => a -> a -> a
/ Vector Double -> Double
forall a (f :: * -> *). (Additive a, Foldable f) => f a -> a
sum Vector Double
w) (Double -> Double) -> Vector Double -> Vector Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Vector Double
w
    x :: Vector Double
x = case DealOvers
o of
      DealOvers
IgnoreOvers -> Vector Double
cs
      IncludeOvers Double
outw ->
        Double -> Vector Double
forall a. a -> Vector a
V.singleton (Vector Double -> Double
forall a. Vector a -> a
V.head Vector Double
cs Double -> Double -> Double
forall a. Subtractive a => a -> a -> a
- Double
outw)
          Vector Double -> Vector Double -> Vector Double
forall a. Semigroup a => a -> a -> a
<> Vector Double
cs
          Vector Double -> Vector Double -> Vector Double
forall a. Semigroup a => a -> a -> a
<> Double -> Vector Double
forall a. a -> Vector a
V.singleton (Vector Double -> Double
forall a. Vector a -> a
V.last Vector Double
cs Double -> Double -> Double
forall a. Additive a => a -> a -> a
+ Double
outw)
    z :: Vector Double
z = Int -> Vector Double -> Vector Double
forall a. Int -> Vector a -> Vector a
V.drop Int
forall a. Multiplicative a => a
one Vector Double
x

-- | approx regular n-quantiles
--
-- >>> regularQuantiles 4 [0..100]
-- [0.0,24.75,50.0,75.25,100.0]
regularQuantiles :: Double -> [Double] -> [Double]
regularQuantiles :: Double -> [Double] -> [Double]
regularQuantiles Double
n [Double]
xs = [Double] -> [Double] -> [Double]
quantileFold [Double]
qs [Double]
xs
  where
    qs :: [Double]
qs = ((Double
1 Double -> Double -> Double
forall a. Divisive a => a -> a -> a
/ Double
n) Double -> Double -> Double
forall a. Multiplicative a => a -> a -> a
*) (Double -> Double) -> [Double] -> [Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Double
0 .. Double
n]

-- | one-pass approximate quantiles fold
quantileFold :: [Double] -> [Double] -> [Double]
quantileFold :: [Double] -> [Double] -> [Double]
quantileFold [Double]
qs [Double]
xs = TDigest 25 -> [Double]
forall (comp :: Nat). KnownNat comp => TDigest comp -> [Double]
done (TDigest 25 -> [Double]) -> TDigest 25 -> [Double]
forall a b. (a -> b) -> a -> b
$ (TDigest 25 -> Double -> TDigest 25)
-> TDigest 25 -> [Double] -> TDigest 25
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' TDigest 25 -> Double -> TDigest 25
forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> Double -> TDigest comp
step TDigest 25
begin [Double]
xs
  where
    step :: TDigest comp -> Double -> TDigest comp
step TDigest comp
x Double
a = Double -> TDigest comp -> TDigest comp
forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
TD.insert Double
a TDigest comp
x
    begin :: TDigest 25
begin = [Double] -> TDigest 25
forall (f :: * -> *) (comp :: Nat).
(Foldable f, KnownNat comp) =>
f Double -> TDigest comp
TD.tdigest ([] :: [Double]) :: TD.TDigest 25
    done :: TDigest comp -> [Double]
done TDigest comp
x = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe (Double
0 Double -> Double -> Double
forall a. Divisive a => a -> a -> a
/ Double
0) (Maybe Double -> Double)
-> (Double -> Maybe Double) -> Double -> Double
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Double -> TDigest comp -> Maybe Double
forall (comp :: Nat). Double -> TDigest comp -> Maybe Double
`TD.quantile` TDigest comp -> TDigest comp
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
TD.compress TDigest comp
x) (Double -> Double) -> [Double] -> [Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Double]
qs

-- | normalize a histogram
--
-- > \h -> sum (values $ freq h) == one
--
-- >>> freq $ fill [0,50,100] [0..99]
-- Histogram {cuts = [0.0,50.0,100.0], values = fromList [(1,0.5),(2,0.5)]}
freq :: Histogram -> Histogram
freq :: Histogram -> Histogram
freq (Histogram Vector Double
cs Map Int Double
vs) = Vector Double -> Map Int Double -> Histogram
Histogram Vector Double
cs (Map Int Double -> Histogram) -> Map Int Double -> Histogram
forall a b. (a -> b) -> a -> b
$ (Double -> Double) -> Map Int Double -> Map Int Double
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (Double -> Double -> Double
forall a. Multiplicative a => a -> a -> a
* Double -> Double
forall a. Divisive a => a -> a
recip (Map Int Double -> Double
forall a (f :: * -> *). (Additive a, Foldable f) => f a -> a
sum Map Int Double
vs)) Map Int Double
vs

-- | average
--
-- >>> average [0..1000]
-- 500.0
average :: (Foldable f) => f Double -> Double
average :: f Double -> Double
average f Double
xs = f Double -> Double
forall a (f :: * -> *). (Additive a, Foldable f) => f a -> a
sum f Double
xs Double -> Double -> Double
forall a. Divisive a => a -> a -> a
/ Int -> Double
forall a b. FromIntegral a b => b -> a
fromIntegral (f Double -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length f Double
xs)

-- | Regularly spaced (approx) quantiles
--
-- >>> quantiles 5 [1..1000]
-- [1.0,200.5,400.5,600.5000000000001,800.5,1000.0]
quantiles :: (Foldable f) => Int -> f Double -> [Double]
quantiles :: Int -> f Double -> [Double]
quantiles Int
n f Double
xs =
  ( \Double
x ->
      Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0 (Maybe Double -> Double) -> Maybe Double -> Double
forall a b. (a -> b) -> a -> b
$
        Double -> TDigest 25 -> Maybe Double
forall (comp :: Nat). Double -> TDigest comp -> Maybe Double
TD.quantile Double
x (f Double -> TDigest 25
forall (f :: * -> *) (comp :: Nat).
(Foldable f, KnownNat comp) =>
f Double -> TDigest comp
TD.tdigest f Double
xs :: TD.TDigest 25)
  )
    (Double -> Double) -> [Double] -> [Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((Double -> Double -> Double
forall a. Divisive a => a -> a -> a
/ Int -> Double
forall a b. FromIntegral a b => b -> a
fromIntegral Int
n) (Double -> Double) -> (Int -> Double) -> Int -> Double
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Double
forall a b. FromIntegral a b => b -> a
fromIntegral (Int -> Double) -> [Int] -> [Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Int
0 .. Int
n])

-- | single (approx) quantile
--
-- >>> quantile 0.1 [1..1000]
-- 100.5
quantile :: (Foldable f) => Double -> f Double -> Double
quantile :: Double -> f Double -> Double
quantile Double
p f Double
xs = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0 (Maybe Double -> Double) -> Maybe Double -> Double
forall a b. (a -> b) -> a -> b
$ Double -> TDigest 25 -> Maybe Double
forall (comp :: Nat). Double -> TDigest comp -> Maybe Double
TD.quantile Double
p (f Double -> TDigest 25
forall (f :: * -> *) (comp :: Nat).
(Foldable f, KnownNat comp) =>
f Double -> TDigest comp
TD.tdigest f Double
xs :: TD.TDigest 25)