-- |
-- Module      : Streamly.Statistics
-- Copyright   : (c) 2020 Composewell Technologies
-- License     : Apache-2.0
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
-- Portability : GHC
--
-- Statistical measures over a stream of data. All operations use numerically
-- stable floating point arithmetic.
--
-- Measurements can be performed over the entire input stream or on a sliding
-- window of fixed or variable size.  Where possible, measures are computed
-- online without buffering the input stream.
--
-- Currently there is no overflow detection.
--
-- References:
--
-- * https://en.wikipedia.org/wiki/Statistics
-- * https://mathworld.wolfram.com/topics/ProbabilityandStatistics.html

-- Resources:
--
-- This may be another useful resource for incremental (non-windowed)
-- computation:
--
-- https://www.researchgate.net/publication/287152055_Incremental_Statistical_Measures
--
-- Sample Statistics
--
-- Terms
--
-- Population: the complete data set from which statistical samples are taken.
--
-- Sample: a subset of the population.
--
-- https://en.wikipedia.org/wiki/Sample_(statistics)
--
-- Estimator:
--
-- Statistical measures can be computed either from the actual population
-- or from samples. Statistical measures computed from the samples provide an
-- estimate of the actual measures of the entire population. Measures computed
-- from samples may not truly reflect the actual measures and may have to be
-- adjusted for biases or errors.
--
-- An "estimator" is a method or function to compute a statistical measure from
-- sampled data. For example, the sample variance is an esitmator of the
-- population variance.
--
-- https://en.wikipedia.org/wiki/Estimator
--
-- Bias:
--
-- The result computed by an estimator may not be centered at the true value as
-- determined by computing the measure for the actual population. Such an
-- estimator is called a biased estimator.  For example, notice how
-- 'sampleVariance' is adjusted for bias.
--
-- https://en.wikipedia.org/wiki/Bias_of_an_estimator
--
-- Consistency:
--
-- https://en.wikipedia.org/wiki/Consistent_estimator

{-# LANGUAGE ScopedTypeVariables #-}
module Streamly.Statistics
    (
    -- * Incremental Folds
    -- | Folds of type @Fold m (a, Maybe a) b@ are incremental sliding window
    -- folds. An input of type @(a, Nothing)@ indicates that the input element
    -- @a@ is being inserted in the window without ejecting an old value
    -- increasing the window size by 1. An input of type @(a, Just a)@
    -- indicates that the first element is being inserted in the window and the
    -- second element is being removed from the window, the window size remains
    -- the same. The window size can only increase and never decrease.
    --
    -- You can compute the statistics over the entire stream using sliding
    -- window folds by keeping the second element of the input tuple as
    -- @Nothing@.
    --
      Window.lmap
    , Window.cumulative

    -- * Summary Statistics
    -- | See https://en.wikipedia.org/wiki/Summary_statistics .

    -- ** Sums
    , Window.length
    , Window.sum
    , Window.sumInt
    , Window.powerSum

    -- ** Location
    -- | See https://en.wikipedia.org/wiki/Location_parameter .
    --
    -- See https://en.wikipedia.org/wiki/Central_tendency .
    , minimum
    , maximum
    , rawMoment
    , rawMomentFrac

    -- Pythagorean means (https://en.wikipedia.org/wiki/Pythagorean_means)
    , mean
    , welfordMean
    , geometricMean
    , harmonicMean

    , quadraticMean

    -- Generalized mean
    , powerMean
    , powerMeanFrac

    -- ** Weighted Means
    -- | Exponential Smoothing.
    , ewma
    , ewmaAfterMean
    , ewmaRampUpSmoothing

    -- ** Spread
    -- | Second order central moment is a statistical measure of dispersion.
    -- The \(k\)th moment about the mean (or \(k\)th central moment) is defined
    -- as:
    --
    -- \(\mu_k = \frac{1}{n}\sum_{i=1}^n {(x_{i}-\mu)}^k\)
    --
    -- See https://mathworld.wolfram.com/CentralMoment.html .
    --
    -- See https://en.wikipedia.org/wiki/Statistical_dispersion .
    , range
    , md
    , variance
    , stdDev

    -- ** Shape
    -- | Third and fourth order central moments are a measure of shape.
    --
    -- See https://en.wikipedia.org/wiki/Shape_parameter .
    --
    -- See https://en.wikipedia.org/wiki/Standardized_moment .
    , skewness
    , kurtosis

    -- XXX Move to Statistics.Sample or Statistics.Estimation module?
    -- ** Estimation
    , sampleVariance
    , sampleStdDev
    , stdErrMean

    -- ** Resampling
    , resample
    , foldResamples
    , jackKnifeMean
    , jackKnifeVariance
    , jackKnifeStdDev

    -- ** Probability Distribution
    , frequency
    , frequency'
    , mode

    -- Histograms
    , HistBin (..)
    , binOffsetSize
    , binFromSizeN
    , binFromToN
    , binBoundaries
    , histogram

    -- * Transforms
    , fft
    )
where

import Control.Exception (assert)
import Control.Monad (when)
import Control.Monad.IO.Class (MonadIO(..))
import Data.Bits (Bits(complement, shiftL, shiftR, (.&.), (.|.)))
import Data.Complex (Complex ((:+)))
import Data.Function ((&))
import Data.Functor.Identity (runIdentity, Identity)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Streamly.Data.Array (Array, length, Unbox)
import Streamly.Data.Fold (Tee(..))
import Streamly.Data.Stream (Stream)
import Streamly.Internal.Data.Array.Type (unsafeIndexIO)
import Streamly.Internal.Data.Fold.Type (Fold(..), Step(..))
import Streamly.Internal.Data.Stream.StreamD.Step (Step(..))
import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..))
import Streamly.Internal.Data.Unfold.Type (Unfold(..))
import System.Random.MWC (createSystemRandom, uniformRM)

import qualified Data.Map.Strict as Map
import qualified Deque.Strict as Deque
import qualified Streamly.Data.Fold as Fold
import qualified Streamly.Data.Array as Array hiding (read)
import qualified Streamly.Internal.Data.Array as Array (read)
import qualified Streamly.Data.MutArray as MA
import qualified Streamly.Internal.Data.Array.Mut as MA
    (getIndexUnsafe, putIndexUnsafe, unsafeSwapIndices)
import qualified Streamly.Internal.Data.Fold.Window as Window
import qualified Streamly.Data.Stream as Stream

import Prelude hiding (length, sum, minimum, maximum)

-- TODO: Overflow checks. Would be good if we can directly replace the
-- operations with overflow checked operations.
--
-- See https://hackage.haskell.org/package/safe-numeric
-- See https://hackage.haskell.org/package/safeint
--
-- TODO We have many of these functions in Streamly.Data.Fold as well. Need to
-- think about deduplication.

-------------------------------------------------------------------------------
-- Transforms
-------------------------------------------------------------------------------

-- XXX These utility functions can be moved to streamly-numeric

-- | Test if the given integer value is a power of 2.
{-# INLINE isPower2 #-}
isPower2 :: Int -> Bool
isPower2 :: Int -> Bool
isPower2 Int
n = Int
n forall a. Bits a => a -> a -> a
.&. (Int
n forall a. Num a => a -> a -> a
- Int
1) forall a. Eq a => a -> a -> Bool
== Int
0

-- | Create a power of 2
--
-- Argument must be less than 64 assuming 64-bit Int size.
--
{-# INLINE _power2 #-}
_power2 :: Int -> Int
_power2 :: Int -> Int
_power2 Int
n = forall a. Bits a => a -> Int -> a
shiftL Int
1 Int
n

-- | Create a bit mask with lower n bits 0 and the rest as 1.
--
-- Argument must be less than 64 assuming 64-bit Int size.
--
{-# INLINE maskLowerN #-}
maskLowerN :: Int -> Int
maskLowerN :: Int -> Int
maskLowerN Int
n = forall a. Bits a => a -> a
complement (forall a. Bits a => a -> Int -> a
shiftL Int
1 Int
n forall a. Num a => a -> a -> a
- Int
1)

-- | Compute the base 2 logarithm of the given value.
--
-- Assumes the Int size to be 64-bit.
--
{-# INLINE logBase2 #-}
logBase2 :: Int -> Int
logBase2 :: Int -> Int
logBase2 Int
v0
    | Int
v0 forall a. Ord a => a -> a -> Bool
<= Int
0   = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"logBase2: input must be greater than 0 " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
v0
    | Bool
otherwise = Int -> Int -> Int -> Int
go Int
32 Int
0 Int
v0

    where

    go :: Int -> Int -> Int -> Int
go !Int
bits !Int
result !Int
v
        | Int
bits forall a. Eq a => a -> a -> Bool
== Int
0 = Int
result
        | Int
v forall a. Bits a => a -> a -> a
.&. Int -> Int
maskLowerN Int
bits forall a. Eq a => a -> a -> Bool
/= Int
0 =
             Int -> Int -> Int -> Int
go (Int
bits forall a. Bits a => a -> Int -> a
`shiftR` Int
1) (Int
result forall a. Bits a => a -> a -> a
.|. Int
bits) (Int
v forall a. Bits a => a -> Int -> a
`shiftR` Int
bits)
        | Bool
otherwise = Int -> Int -> Int -> Int
go (Int
bits forall a. Bits a => a -> Int -> a
`shiftR` Int
1) Int
result Int
v

-- Algo translated from the statistics library.
--
-- XXX We can use a wrapper API that takes an array of Double input instead of
-- array of Complex.
--
-- | Compute fast fourier transform of an array of 'Complex' values.
--
-- Array length must be power of 2.
--
{-# INLINE fft #-}
fft :: MonadIO m => MA.MutArray (Complex Double) -> m ()
fft :: forall (m :: * -> *).
MonadIO m =>
MutArray (Complex Double) -> m ()
fft MutArray (Complex Double)
marr
    | Int -> Bool
isPower2 Int
len = forall {m :: * -> *}. MonadIO m => Int -> Int -> m ()
bitReverse Int
0 Int
0
    | Bool
otherwise  = forall a. HasCallStack => [Char] -> a
error [Char]
"fft: Array length must be power of 2"

    where

    len :: Int
len = forall a. Unbox a => MutArray a -> Int
MA.length MutArray (Complex Double)
marr

    halve :: a -> a
halve a
x = a
x forall a. Bits a => a -> Int -> a
`shiftR` Int
1

    twice :: a -> a
twice a
x = a
x forall a. Bits a => a -> Int -> a
`shiftL` Int
1

    inner :: Int -> Int -> Int -> m ()
inner Int
i Int
j Int
k
        | Int
k forall a. Ord a => a -> a -> Bool
<= Int
j  = Int -> Int -> Int -> m ()
inner Int
i (Int
j forall a. Num a => a -> a -> a
- Int
k) (forall a. Bits a => a -> a
halve Int
k)
        | Bool
otherwise = Int -> Int -> m ()
bitReverse (Int
i forall a. Num a => a -> a -> a
+ Int
1) (Int
j forall a. Num a => a -> a -> a
+ Int
k)

    bitReverse :: Int -> Int -> m ()
bitReverse Int
i Int
j
        | Int
i forall a. Eq a => a -> a -> Bool
== Int
len forall a. Num a => a -> a -> a
- Int
1 = forall {m :: * -> *}. MonadIO m => Int -> Int -> m ()
stage Int
0 Int
1
        | Bool
otherwise = do
            forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i forall a. Ord a => a -> a -> Bool
< Int
j) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a.
(MonadIO m, Unbox a) =>
Int -> Int -> MutArray a -> m ()
MA.unsafeSwapIndices Int
i Int
j MutArray (Complex Double)
marr
            Int -> Int -> Int -> m ()
inner Int
i Int
j (forall a. Bits a => a -> a
halve Int
len)

    log2len :: Int
log2len = Int -> Int
logBase2 Int
len

    stage :: Int -> Int -> m ()
stage Int
l !Int
l1
        | Int
l forall a. Eq a => a -> a -> Bool
== Int
log2len = forall (m :: * -> *) a. Monad m => a -> m a
return ()
        | Bool
otherwise = do
            let !l2 :: Int
l2 = forall a. Bits a => a -> a
twice Int
l1
                !e :: Double
e  = -Double
6.283185307179586forall a. Fractional a => a -> a -> a
/forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
l2
                flight :: Int -> Double -> m ()
flight Int
j !Double
a | Int
j forall a. Eq a => a -> a -> Bool
== Int
l1   = Int -> Int -> m ()
stage (Int
l forall a. Num a => a -> a -> a
+ Int
1) Int
l2
                            | Bool
otherwise = do
                    let butterfly :: Int -> m ()
butterfly Int
i | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len  = Int -> Double -> m ()
flight (Int
j forall a. Num a => a -> a -> a
+ Int
1) (Double
a forall a. Num a => a -> a -> a
+ Double
e)
                                    | Bool
otherwise = do
                            let i1 :: Int
i1 = Int
i forall a. Num a => a -> a -> a
+ Int
l1
                            Double
xi1 :+ Double
yi1 <- forall (m :: * -> *) a.
(MonadIO m, Unbox a) =>
Int -> MutArray a -> m a
MA.getIndexUnsafe Int
i1 MutArray (Complex Double)
marr
                            let !c :: Double
c = forall a. Floating a => a -> a
cos Double
a
                                !s :: Double
s = forall a. Floating a => a -> a
sin Double
a
                                d :: Complex Double
d  = (Double
c forall a. Num a => a -> a -> a
* Double
xi1 forall a. Num a => a -> a -> a
- Double
s forall a. Num a => a -> a -> a
* Double
yi1) forall a. a -> a -> Complex a
:+ (Double
s forall a. Num a => a -> a -> a
* Double
xi1 forall a. Num a => a -> a -> a
+ Double
c forall a. Num a => a -> a -> a
* Double
yi1)
                            Complex Double
ci <- forall (m :: * -> *) a.
(MonadIO m, Unbox a) =>
Int -> MutArray a -> m a
MA.getIndexUnsafe Int
i MutArray (Complex Double)
marr
                            forall (m :: * -> *) a.
(MonadIO m, Unbox a) =>
Int -> MutArray a -> a -> m ()
MA.putIndexUnsafe Int
i1 MutArray (Complex Double)
marr (Complex Double
ci forall a. Num a => a -> a -> a
- Complex Double
d)
                            forall (m :: * -> *) a.
(MonadIO m, Unbox a) =>
Int -> MutArray a -> a -> m ()
MA.putIndexUnsafe Int
i MutArray (Complex Double)
marr (Complex Double
ci forall a. Num a => a -> a -> a
+ Complex Double
d)
                            Int -> m ()
butterfly (Int
i forall a. Num a => a -> a -> a
+ Int
l2)
                    Int -> m ()
butterfly Int
j
            Int -> Double -> m ()
flight Int
0 Double
0

-------------------------------------------------------------------------------
-- Location
-------------------------------------------------------------------------------

-- Theoretically, we can approximate minimum in a rolling window by using a
-- 'powerMean' with sufficiently large negative power.
--
-- XXX If we need to know the minimum in the window only once in a while then
-- we can use linear search when it is extracted and not pay the cost all the
-- time.
--
-- | The minimum element in a rolling window.
--
-- For smaller window sizes (< 30) Streamly.Data.Fold.Window.minimum performs
-- better.  If you want to compute the minimum of the entire stream Fold.min
-- from streamly package would be much faster.
--
-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size.
--
{-# INLINE minimum #-}
minimum :: (Monad m, Ord a) => Fold m (a, Maybe a) a
minimum :: forall (m :: * -> *) a. (Monad m, Ord a) => Fold m (a, Maybe a) a
minimum = forall (m :: * -> *) a b s.
(s -> a -> m (Step s b))
-> m (Step s b) -> (s -> m b) -> Fold m a b
Fold forall {m :: * -> *} {a} {b} {a} {b}.
(Monad m, Num a, Ord a, Ord b) =>
Tuple3' a a (Deque (a, b))
-> (b, Maybe a) -> m (Step (Tuple3' a a (Deque (a, b))) b)
step forall {a} {b}. m (Step (Tuple3' Int Int (Deque (Int, a))) b)
initial forall {m :: * -> *} {a} {a} {b} {a}.
(Monad m, Num a) =>
Tuple3' a b (Deque (a, a)) -> m a
extract

    where

    initial :: m (Step (Tuple3' Int Int (Deque (Int, a))) b)
initial =
        forall (m :: * -> *) a. Monad m => a -> m a
return
            forall a b. (a -> b) -> a -> b
$ forall s b. s -> Step s b
Partial
            forall a b. (a -> b) -> a -> b
$ forall a b c. a -> b -> c -> Tuple3' a b c
Tuple3' (Int
0 :: Int) (Int
0 :: Int) (forall a. Monoid a => a
mempty :: Deque.Deque (Int, a))

    step :: Tuple3' a a (Deque (a, b))
-> (b, Maybe a) -> m (Step (Tuple3' a a (Deque (a, b))) b)
step (Tuple3' a
i a
w Deque (a, b)
q) (b
a, Maybe a
ma) =
        case Maybe a
ma of
            Maybe a
Nothing ->
                forall (m :: * -> *) a. Monad m => a -> m a
return
                    forall a b. (a -> b) -> a -> b
$ forall s b. s -> Step s b
Partial
                    forall a b. (a -> b) -> a -> b
$ forall a b c. a -> b -> c -> Tuple3' a b c
Tuple3'
                        (a
i forall a. Num a => a -> a -> a
+ a
1)
                        (a
w forall a. Num a => a -> a -> a
+ a
1)
                        (forall {a} {b}.
(Ord a, Num a) =>
a -> Deque (a, b) -> a -> Deque (a, b)
headCheck a
i Deque (a, b)
q (a
w forall a. Num a => a -> a -> a
+ a
1) forall a b. a -> (a -> b) -> b
& forall {a} {a}. Ord a => (a, a) -> Deque (a, a) -> Deque (a, a)
dqloop (a
i, b
a))
            Just a
_ ->
                forall (m :: * -> *) a. Monad m => a -> m a
return
                    forall a b. (a -> b) -> a -> b
$ forall s b. s -> Step s b
Partial
                    forall a b. (a -> b) -> a -> b
$ forall a b c. a -> b -> c -> Tuple3' a b c
Tuple3' (a
i forall a. Num a => a -> a -> a
+ a
1) a
w (forall {a} {b}.
(Ord a, Num a) =>
a -> Deque (a, b) -> a -> Deque (a, b)
headCheck a
i Deque (a, b)
q a
w forall a b. a -> (a -> b) -> b
& forall {a} {a}. Ord a => (a, a) -> Deque (a, a) -> Deque (a, a)
dqloop (a
i,b
a))

    {-# INLINE headCheck #-}
    headCheck :: a -> Deque (a, b) -> a -> Deque (a, b)
headCheck a
i Deque (a, b)
q a
w =
        case forall a. Deque a -> Maybe (a, Deque a)
Deque.uncons Deque (a, b)
q of
            Maybe ((a, b), Deque (a, b))
Nothing -> Deque (a, b)
q
            Just ((a, b)
ia', Deque (a, b)
q') ->
                if forall a b. (a, b) -> a
fst (a, b)
ia' forall a. Ord a => a -> a -> Bool
<= a
i forall a. Num a => a -> a -> a
- a
w
                then Deque (a, b)
q'
                else Deque (a, b)
q

    dqloop :: (a, a) -> Deque (a, a) -> Deque (a, a)
dqloop (a, a)
ia Deque (a, a)
q =
        case forall a. Deque a -> Maybe (a, Deque a)
Deque.unsnoc Deque (a, a)
q of
            Maybe ((a, a), Deque (a, a))
Nothing -> forall a. a -> Deque a -> Deque a
Deque.snoc (a, a)
ia Deque (a, a)
q
            -- XXX This can be improved for the case of `=`
            Just ((a, a)
ia', Deque (a, a)
q') ->
                if forall a b. (a, b) -> b
snd (a, a)
ia forall a. Ord a => a -> a -> Bool
<= forall a b. (a, b) -> b
snd (a, a)
ia'
                then (a, a) -> Deque (a, a) -> Deque (a, a)
dqloop (a, a)
ia Deque (a, a)
q'
                else forall a. a -> Deque a -> Deque a
Deque.snoc (a, a)
ia Deque (a, a)
q

    extract :: Tuple3' a b (Deque (a, a)) -> m a
extract (Tuple3' a
_ b
_ Deque (a, a)
q) =
        forall (m :: * -> *) a. Monad m => a -> m a
return
            forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> b
snd
            forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a -> a
fromMaybe (a
0, forall a. HasCallStack => [Char] -> a
error [Char]
"minimum: Empty stream")
            forall a b. (a -> b) -> a -> b
$ forall a. Deque a -> Maybe a
Deque.head Deque (a, a)
q

-- Theoretically, we can approximate maximum in a rolling window by using a
-- 'powerMean' with sufficiently large positive power.
--
-- | The maximum element in a rolling window.
--
-- For smaller window sizes (< 30) Streamly.Data.Fold.Window.maximum performs
-- better.  If you want to compute the maximum of the entire stream
-- Streamly.Data.Fold.maximum from streamly package would be much faster.
--
-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size.
--
{-# INLINE maximum #-}
maximum :: (Monad m, Ord a) => Fold m (a, Maybe a) a
maximum :: forall (m :: * -> *) a. (Monad m, Ord a) => Fold m (a, Maybe a) a
maximum = forall (m :: * -> *) a b s.
(s -> a -> m (Step s b))
-> m (Step s b) -> (s -> m b) -> Fold m a b
Fold forall {m :: * -> *} {a} {b} {a} {b}.
(Monad m, Num a, Ord a, Ord b) =>
Tuple3' a a (Deque (a, b))
-> (b, Maybe a) -> m (Step (Tuple3' a a (Deque (a, b))) b)
step forall {a} {b}. m (Step (Tuple3' Int Int (Deque (Int, a))) b)
initial forall {m :: * -> *} {a} {a} {b} {a}.
(Monad m, Num a) =>
Tuple3' a b (Deque (a, a)) -> m a
extract

    where

    initial :: m (Step (Tuple3' Int Int (Deque (Int, a))) b)
initial =
        forall (m :: * -> *) a. Monad m => a -> m a
return
            forall a b. (a -> b) -> a -> b
$ forall s b. s -> Step s b
Partial
            forall a b. (a -> b) -> a -> b
$ forall a b c. a -> b -> c -> Tuple3' a b c
Tuple3' (Int
0 :: Int) (Int
0 :: Int) (forall a. Monoid a => a
mempty :: Deque.Deque (Int, a))

    step :: Tuple3' a a (Deque (a, b))
-> (b, Maybe a) -> m (Step (Tuple3' a a (Deque (a, b))) b)
step (Tuple3' a
i a
w Deque (a, b)
q) (b
a, Maybe a
ma) =
        case Maybe a
ma of
            Maybe a
Nothing ->
                forall (m :: * -> *) a. Monad m => a -> m a
return
                    forall a b. (a -> b) -> a -> b
$ forall s b. s -> Step s b
Partial
                    forall a b. (a -> b) -> a -> b
$ forall a b c. a -> b -> c -> Tuple3' a b c
Tuple3'
                        (a
i forall a. Num a => a -> a -> a
+ a
1)
                        (a
w forall a. Num a => a -> a -> a
+ a
1)
                        (forall {a} {b}.
(Ord a, Num a) =>
a -> Deque (a, b) -> a -> Deque (a, b)
headCheck a
i Deque (a, b)
q (a
w forall a. Num a => a -> a -> a
+ a
1) forall a b. a -> (a -> b) -> b
& forall {a} {a}. Ord a => (a, a) -> Deque (a, a) -> Deque (a, a)
dqloop (a
i, b
a))
            Just a
_ ->
                forall (m :: * -> *) a. Monad m => a -> m a
return
                    forall a b. (a -> b) -> a -> b
$ forall s b. s -> Step s b
Partial
                    forall a b. (a -> b) -> a -> b
$ forall a b c. a -> b -> c -> Tuple3' a b c
Tuple3' (a
i forall a. Num a => a -> a -> a
+ a
1) a
w (forall {a} {b}.
(Ord a, Num a) =>
a -> Deque (a, b) -> a -> Deque (a, b)
headCheck a
i Deque (a, b)
q a
w forall a b. a -> (a -> b) -> b
& forall {a} {a}. Ord a => (a, a) -> Deque (a, a) -> Deque (a, a)
dqloop (a
i,b
a))

    {-# INLINE headCheck #-}
    headCheck :: a -> Deque (a, b) -> a -> Deque (a, b)
headCheck a
i Deque (a, b)
q a
w =
        case forall a. Deque a -> Maybe (a, Deque a)
Deque.uncons Deque (a, b)
q of
            Maybe ((a, b), Deque (a, b))
Nothing -> Deque (a, b)
q
            Just ((a, b)
ia', Deque (a, b)
q') ->
                if forall a b. (a, b) -> a
fst (a, b)
ia' forall a. Ord a => a -> a -> Bool
<= a
i forall a. Num a => a -> a -> a
- a
w
                then Deque (a, b)
q'
                else Deque (a, b)
q

    dqloop :: (a, a) -> Deque (a, a) -> Deque (a, a)
dqloop (a, a)
ia Deque (a, a)
q =
        case forall a. Deque a -> Maybe (a, Deque a)
Deque.unsnoc Deque (a, a)
q of
            Maybe ((a, a), Deque (a, a))
Nothing -> forall a. a -> Deque a -> Deque a
Deque.snoc (a, a)
ia Deque (a, a)
q
            -- XXX This can be improved for the case of `=`
            Just ((a, a)
ia', Deque (a, a)
q') ->
                if forall a b. (a, b) -> b
snd (a, a)
ia forall a. Ord a => a -> a -> Bool
>= forall a b. (a, b) -> b
snd (a, a)
ia'
                then (a, a) -> Deque (a, a) -> Deque (a, a)
dqloop (a, a)
ia Deque (a, a)
q'
                else forall a. a -> Deque a -> Deque a
Deque.snoc (a, a)
ia Deque (a, a)
q

    extract :: Tuple3' a b (Deque (a, a)) -> m a
extract (Tuple3' a
_ b
_ Deque (a, a)
q) =
        forall (m :: * -> *) a. Monad m => a -> m a
return
            forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> b
snd
            forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a -> a
fromMaybe (a
0, forall a. HasCallStack => [Char] -> a
error [Char]
"maximum: Empty stream")
            forall a b. (a -> b) -> a -> b
$ forall a. Deque a -> Maybe a
Deque.head Deque (a, a)
q

-------------------------------------------------------------------------------
-- Mean
-------------------------------------------------------------------------------

-- | Arithmetic mean of elements in a sliding window:
--
-- \(\mu = \frac{\sum_{i=1}^n x_{i}}{n}\)
--
-- This is also known as the Simple Moving Average (SMA) when used in the
-- sliding window and Cumulative Moving Avergae (CMA) when used on the entire
-- stream.
--
-- Mean is the same as the first raw moment.
--
-- \(\mu = \mu'_1\)
--
-- >>> mean = rawMoment 1
-- >>> mean = powerMean 1
-- >>> mean = Fold.teeWith (/) sum length
--
-- /Space/: \(\mathcal{O}(1)\)
--
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE mean #-}
mean :: forall m a. (Monad m, Fractional a) => Fold m (a, Maybe a) a
mean :: forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
mean = forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
Window.mean

-- | Recompute mean from old mean when an item is removed from the sample.
{-# INLINE _meanSubtract #-}
_meanSubtract :: Fractional a => Int -> a -> a -> a
_meanSubtract :: forall a. Fractional a => Int -> a -> a -> a
_meanSubtract Int
n a
oldMean a
oldItem =
    let delta :: a
delta = (a
oldItem forall a. Num a => a -> a -> a
- a
oldMean) forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n forall a. Num a => a -> a -> a
- Int
1)
     in a
oldMean forall a. Num a => a -> a -> a
- a
delta

-- | Recompute mean from old mean when an item is added to the sample.
{-# INLINE meanAdd #-}
meanAdd :: Fractional a => Int -> a -> a -> a
meanAdd :: forall a. Fractional a => Int -> a -> a -> a
meanAdd Int
n a
oldMean a
newItem =
    let delta :: a
delta = (a
newItem forall a. Num a => a -> a -> a
- a
oldMean) forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n forall a. Num a => a -> a -> a
+ Int
1)
     in a
oldMean forall a. Num a => a -> a -> a
+ a
delta

-- We do not carry rounding errors, therefore, this would be less numerically
-- stable than the kbn mean.
--
-- | Recompute mean from old mean when an item in the sample is replaced.
{-# INLINE meanReplace #-}
meanReplace :: Fractional a => Int -> a -> a -> a -> a
meanReplace :: forall a. Fractional a => Int -> a -> a -> a -> a
meanReplace Int
n a
oldMean a
oldItem a
newItem =
    let n1 :: a
n1 = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
        -- Compute two deltas instead of a single (newItem - oldItem) because
        -- the latter would be too small causing rounding errors.
        delta1 :: a
delta1 = (a
newItem forall a. Num a => a -> a -> a
- a
oldMean) forall a. Fractional a => a -> a -> a
/ a
n1
        delta2 :: a
delta2 = (a
oldItem forall a. Num a => a -> a -> a
- a
oldMean) forall a. Fractional a => a -> a -> a
/ a
n1
     in (a
oldMean forall a. Num a => a -> a -> a
+ a
delta1) forall a. Num a => a -> a -> a
- a
delta2

-- | Same as 'mean' but uses Welford's algorithm to compute the mean
-- incrementally.
--
-- It maintains a running mean instead of a running sum and adjusts the mean
-- based on a new value.  This is slower than 'mean' because of using the
-- division operation on each step and it is numerically unstable (as of now).
-- The advantage over 'mean' could be no overflow if the numbers are large,
-- because we do not maintain a sum, but that is a highly unlikely corner case.
--
-- /Internal/
{-# INLINE welfordMean #-}
welfordMean :: forall m a. (Monad m, Fractional a) => Fold m (a, Maybe a) a
welfordMean :: forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
welfordMean = forall (m :: * -> *) a b s.
(s -> a -> m (Step s b))
-> m (Step s b) -> (s -> m b) -> Fold m a b
Fold forall {m :: * -> *} {a} {b}.
(Monad m, Fractional a) =>
Tuple' a Int -> (a, Maybe a) -> m (Step (Tuple' a Int) b)
step forall {b}. m (Step (Tuple' a Int) b)
initial forall {m :: * -> *} {a} {b}. Monad m => Tuple' a b -> m a
extract

    where

    initial :: m (Step (Tuple' a Int) b)
initial =
        forall (m :: * -> *) a. Monad m => a -> m a
return
            forall a b. (a -> b) -> a -> b
$ forall s b. s -> Step s b
Partial
            forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> Tuple' a b
Tuple'
                (a
0 :: a)   -- running mean
                (Int
0 :: Int) -- count of items in the window

    step :: Tuple' a Int -> (a, Maybe a) -> m (Step (Tuple' a Int) b)
step (Tuple' a
oldMean Int
w) (a
new, Maybe a
mOld) =
        forall (m :: * -> *) a. Monad m => a -> m a
return
            forall a b. (a -> b) -> a -> b
$ forall s b. s -> Step s b
Partial
            forall a b. (a -> b) -> a -> b
$ case Maybe a
mOld of
                Maybe a
Nothing -> forall a b. a -> b -> Tuple' a b
Tuple' (forall a. Fractional a => Int -> a -> a -> a
meanAdd Int
w a
oldMean a
new) (Int
w forall a. Num a => a -> a -> a
+ Int
1)
                Just a
old -> forall a b. a -> b -> Tuple' a b
Tuple' (forall a. Fractional a => Int -> a -> a -> a -> a
meanReplace Int
w a
oldMean a
old a
new) Int
w

    extract :: Tuple' a b -> m a
extract (Tuple' a
x b
_) = forall (m :: * -> *) a. Monad m => a -> m a
return a
x

-------------------------------------------------------------------------------
-- Moments
-------------------------------------------------------------------------------

-- XXX We may have chances of overflow if the powers are high or the numbers
-- are large. A limited mitigation could be to use welford style avg
-- computation. Do we need an overflow detection?
--
-- | Raw moment is the moment about 0. The \(k\)th raw moment is defined as:
--
-- \(\mu'_k = \frac{\sum_{i=1}^n x_{i}^k}{n}\)
--
-- >>> rawMoment k = Fold.teeWith (/) (powerSum p) length
--
-- See https://en.wikipedia.org/wiki/Moment_(mathematics) .
--
-- /Space/: \(\mathcal{O}(1)\)
--
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE rawMoment #-}
rawMoment :: (Monad m, Fractional a) => Int -> Fold m (a, Maybe a) a
rawMoment :: forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Int -> Fold m (a, Maybe a) a
rawMoment Int
k = forall (m :: * -> *) a b c x.
Monad m =>
(a -> b -> c) -> Fold m x a -> Fold m x b -> Fold m x c
Fold.teeWith forall a. Fractional a => a -> a -> a
(/) (forall (m :: * -> *) a.
(Monad m, Num a) =>
Int -> Fold m (a, Maybe a) a
Window.powerSum Int
k) forall (m :: * -> *) b a. (Monad m, Num b) => Fold m (a, Maybe a) b
Window.length

-- | Like 'rawMoment' but powers can be negative or fractional. This is
-- slower than 'rawMoment' for positive intergal powers.
--
-- >>> rawMomentFrac p = Fold.teeWith (/) (powerSumFrac p) length
--
{-# INLINE rawMomentFrac #-}
rawMomentFrac :: (Monad m, Floating a) => a -> Fold m (a, Maybe a) a
rawMomentFrac :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
a -> Fold m (a, Maybe a) a
rawMomentFrac a
k = forall (m :: * -> *) a b c x.
Monad m =>
(a -> b -> c) -> Fold m x a -> Fold m x b -> Fold m x c
Fold.teeWith forall a. Fractional a => a -> a -> a
(/) (forall (m :: * -> *) a.
(Monad m, Floating a) =>
a -> Fold m (a, Maybe a) a
Window.powerSumFrac a
k) forall (m :: * -> *) b a. (Monad m, Num b) => Fold m (a, Maybe a) b
Window.length

-- XXX Overflow can happen when large powers or large numbers are used. We can
-- keep a running mean instead of running sum but that won't mitigate the
-- overflow possibility by much. The overflow can still happen when computing
-- the mean incrementally.

-- | The \(k\)th power mean of numbers \(x_1, x_2, \ldots, x_n\) is:
--
-- \(M_k = \left( \frac{1}{n} \sum_{i=1}^n x_i^k \right)^{\frac{1}{k}}\)
--
-- \(powerMean(k) = (rawMoment(k))^\frac{1}{k}\)
--
-- >>> powerMean k = (** (1 / fromIntegral k)) <$> rawMoment k
--
-- All other means can be expressed in terms of power mean. It is also known as
-- the generalized mean.
--
-- See https://en.wikipedia.org/wiki/Generalized_mean
--
{-# INLINE powerMean #-}
powerMean :: (Monad m, Floating a) => Int -> Fold m (a, Maybe a) a
powerMean :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
Int -> Fold m (a, Maybe a) a
powerMean Int
k = (forall a. Floating a => a -> a -> a
** (a
1 forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Int -> Fold m (a, Maybe a) a
rawMoment Int
k

-- | Like 'powerMean' but powers can be negative or fractional. This is
-- slower than 'powerMean' for positive intergal powers.
--
-- >>> powerMeanFrac k = (** (1 / k)) <$> rawMomentFrac k
--
{-# INLINE powerMeanFrac #-}
powerMeanFrac :: (Monad m, Floating a) => a -> Fold m (a, Maybe a) a
powerMeanFrac :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
a -> Fold m (a, Maybe a) a
powerMeanFrac a
k = (forall a. Floating a => a -> a -> a
** (a
1 forall a. Fractional a => a -> a -> a
/ a
k)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a.
(Monad m, Floating a) =>
a -> Fold m (a, Maybe a) a
rawMomentFrac a
k

-- | The harmonic mean of the positive numbers \(x_1, x_2, \ldots, x_n\) is
-- defined as:
--
-- \(HM = \frac{n}{\frac1{x_1} + \frac1{x_2} + \cdots + \frac1{x_n}}\)
--
-- \(HM = \left(\frac{\sum\limits_{i=1}^n x_i^{-1}}{n}\right)^{-1}\)
--
-- >>> harmonicMean = Fold.teeWith (/) length (lmap recip sum)
-- >>> harmonicMean = powerMeanFrac (-1)
--
-- See https://en.wikipedia.org/wiki/Harmonic_mean .
--
{-# INLINE harmonicMean #-}
harmonicMean :: (Monad m, Fractional a) => Fold m (a, Maybe a) a
harmonicMean :: forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
harmonicMean = forall (m :: * -> *) a b c x.
Monad m =>
(a -> b -> c) -> Fold m x a -> Fold m x b -> Fold m x c
Fold.teeWith forall a. Fractional a => a -> a -> a
(/) forall (m :: * -> *) b a. (Monad m, Num b) => Fold m (a, Maybe a) b
Window.length (forall c a (m :: * -> *) b.
(c -> a) -> Fold m (a, Maybe a) b -> Fold m (c, Maybe c) b
Window.lmap forall a. Fractional a => a -> a
recip forall (m :: * -> *) a. (Monad m, Num a) => Fold m (a, Maybe a) a
Window.sum)

-- | Geometric mean, defined as:
--
-- \(GM = \sqrt[n]{x_1 x_2 \cdots x_n}\)
--
-- \(GM = \left(\prod_{i=1}^n x_i\right)^\frac{1}{n}\)
--
-- or, equivalently, as the arithmetic mean in log space:
--
-- \(GM = e ^{{\frac{\sum_{i=1}^{n}\ln a_i}{n}}}\)
--
-- >>> geometricMean = exp <$> lmap log mean
--
-- See https://en.wikipedia.org/wiki/Geometric_mean .
{-# INLINE geometricMean #-}
geometricMean :: (Monad m, Floating a) => Fold m (a, Maybe a) a
geometricMean :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
geometricMean = forall a. Floating a => a -> a
exp forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall c a (m :: * -> *) b.
(c -> a) -> Fold m (a, Maybe a) b -> Fold m (c, Maybe c) b
Window.lmap forall a. Floating a => a -> a
log forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
mean

-- | The quadratic mean or root mean square (rms) of the numbers
-- \(x_1, x_2, \ldots, x_n\) is defined as:
--
-- \(RMS = \sqrt{ \frac{1}{n} \left( x_1^2 + x_2^2 + \cdots + x_n^2 \right) }.\)
--
-- >>> quadraticMean = powerMean 2
--
-- See https://en.wikipedia.org/wiki/Root_mean_square .
--
{-# INLINE quadraticMean #-}
quadraticMean :: (Monad m, Floating a) => Fold m (a, Maybe a) a
quadraticMean :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
quadraticMean = forall (m :: * -> *) a.
(Monad m, Floating a) =>
Int -> Fold m (a, Maybe a) a
powerMean Int
2

-------------------------------------------------------------------------------
-- Weighted Means
-------------------------------------------------------------------------------

-- XXX Is this numerically stable? We can use the kbn summation here.
-- | ewmaStep smoothing-factor old-value new-value
{-# INLINE ewmaStep #-}
ewmaStep :: Double -> Double -> Double -> Double
ewmaStep :: Double -> Double -> Double -> Double
ewmaStep Double
k Double
x0 Double
x1 = (Double
1 forall a. Num a => a -> a -> a
- Double
k) forall a. Num a => a -> a -> a
* Double
x0 forall a. Num a => a -> a -> a
+ Double
k forall a. Num a => a -> a -> a
* Double
x1

-- XXX Compute this in a sliding window?
--
-- | @ewma smoothingFactor@.
--
-- @ewma@ of an empty stream is 0.
--
-- Exponential weighted moving average, \(s_n\), of \(n\) values,
-- \(x_1,\ldots,x_n\), is defined recursively as:
--
-- \(\begin{align} s_0& = x_0\\ s_n & = \alpha x_{n} + (1-\alpha)s_{n-1},\quad n>0 \end{align}\)
--
-- If we expand the recursive term it becomes an exponential series:
--
-- \(s_n = \alpha \left[x_n + (1-\alpha)x_{n-1} + (1-\alpha)^2 x_{n-2} + \cdots + (1-\alpha)^{n-1} x_1 \right] + (1-\alpha)^n x_0\)
--
-- where \(\alpha\), the smoothing factor, is in the range \(0 <\alpha < 1\).
-- More the value of \(\alpha\), the more weight is given to newer values.  As
-- a special case if it is 0 then the weighted sum would always be the same as
-- the oldest value, if it is 1 then the sum would always be the same as the
-- newest value.
--
-- See https://en.wikipedia.org/wiki/Moving_average
--
-- See https://en.wikipedia.org/wiki/Exponential_smoothing
--
{-# INLINE ewma #-}
ewma :: Monad m => Double -> Fold m Double Double
ewma :: forall (m :: * -> *). Monad m => Double -> Fold m Double Double
ewma Double
k = forall {a} {b}. Tuple' a b -> a
extract forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) b a.
Monad m =>
(b -> a -> b) -> b -> Fold m a b
Fold.foldl' Tuple' Double Double -> Double -> Tuple' Double Double
step (forall a b. a -> b -> Tuple' a b
Tuple' Double
0 Double
1)

    where

    step :: Tuple' Double Double -> Double -> Tuple' Double Double
step (Tuple' Double
x0 Double
k1) Double
x = forall a b. a -> b -> Tuple' a b
Tuple' (Double -> Double -> Double -> Double
ewmaStep Double
k1 Double
x0 Double
x) Double
k

    extract :: Tuple' a b -> a
extract (Tuple' a
x b
_) = a
x

-- XXX It can perhaps perform better if implemented as a custom fold?
--
-- | @ewma n k@ is like 'ewma' but uses the mean of the first @n@ values and
-- then uses that as the initial value for the @ewma@ of the rest of the
-- values.
--
-- This can be used to reduce the effect of volatility of the initial value
-- when k is too small.
--
{-# INLINE ewmaAfterMean #-}
ewmaAfterMean :: Monad m => Int -> Double -> Fold m Double Double
ewmaAfterMean :: forall (m :: * -> *).
Monad m =>
Int -> Double -> Fold m Double Double
ewmaAfterMean Int
n Double
k =
    forall (m :: * -> *) b a c.
Monad m =>
(b -> Fold m a c) -> Fold m a b -> Fold m a c
Fold.concatMap (\Double
i -> (forall (m :: * -> *) b a.
Monad m =>
(b -> a -> b) -> b -> Fold m a b
Fold.foldl' (Double -> Double -> Double -> Double
ewmaStep Double
k) Double
i)) (forall (m :: * -> *) a b.
Monad m =>
Int -> Fold m a b -> Fold m a b
Fold.take Int
n forall (m :: * -> *) a. (Monad m, Fractional a) => Fold m a a
Fold.mean)

-- | @ewma n k@ is like 'ewma' but uses 1 as the initial smoothing factor and
-- then exponentially smooths it to @k@ using @n@ as the smoothing factor.
--
-- This is significantly faster than 'ewmaAfterMean'.
--
{-# INLINE ewmaRampUpSmoothing #-}
ewmaRampUpSmoothing :: Monad m => Double -> Double -> Fold m Double Double
ewmaRampUpSmoothing :: forall (m :: * -> *).
Monad m =>
Double -> Double -> Fold m Double Double
ewmaRampUpSmoothing Double
n Double
k1 = forall {a} {b}. Tuple' a b -> a
extract forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) b a.
Monad m =>
(b -> a -> b) -> b -> Fold m a b
Fold.foldl' Tuple' Double Double -> Double -> Tuple' Double Double
step Tuple' Double Double
initial

    where

    initial :: Tuple' Double Double
initial = forall a b. a -> b -> Tuple' a b
Tuple' Double
0 Double
1

    step :: Tuple' Double Double -> Double -> Tuple' Double Double
step (Tuple' Double
x0 Double
k0) Double
x1 =
        let x :: Double
x = Double -> Double -> Double -> Double
ewmaStep Double
k0 Double
x0 Double
x1
            k :: Double
k = Double -> Double -> Double -> Double
ewmaStep Double
n Double
k0 Double
k1
        in forall a b. a -> b -> Tuple' a b
Tuple' Double
x Double
k

    extract :: Tuple' a b -> a
extract (Tuple' a
x b
_) = a
x

-------------------------------------------------------------------------------
-- Spread/Dispersion
-------------------------------------------------------------------------------

-- | The difference between the maximum and minimum elements of a rolling window.
--
-- >>> range = Fold.teeWith (-) maximum minimum
--
-- If you want to compute the range of the entire stream @Fold.teeWith (-)
-- Fold.maximum Fold.minimum@ from the streamly package would be much faster.
--
-- /Space/: \(\mathcal{O}(n)\) where @n@ is the window size.
--
-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size.
--
{-# INLINE range #-}
range :: (Monad m, Num a, Ord a) => Fold m (a, Maybe a) a
range :: forall (m :: * -> *) a.
(Monad m, Num a, Ord a) =>
Fold m (a, Maybe a) a
range = forall (m :: * -> *) a b c x.
Monad m =>
(a -> b -> c) -> Fold m x a -> Fold m x b -> Fold m x c
Fold.teeWith (-) forall (m :: * -> *) a. (Monad m, Ord a) => Fold m (a, Maybe a) a
maximum forall (m :: * -> *) a. (Monad m, Ord a) => Fold m (a, Maybe a) a
minimum

-- | @md n@ computes the mean absolute deviation (or mean deviation) in a
-- sliding window of last @n@ elements in the stream.
--
-- The mean absolute deviation of the numbers \(x_1, x_2, \ldots, x_n\) is:
--
-- \(MD = \frac{1}{n}\sum_{i=1}^n |x_i-\mu|\)
--
-- Note: It is expensive to compute MD in a sliding window. We need to
-- maintain a ring buffer of last n elements and maintain a running mean, when
-- the result is extracted we need to compute the difference of all elements
-- from the mean and get the average. Using standard deviation may be
-- computationally cheaper.
--
-- See https://en.wikipedia.org/wiki/Average_absolute_deviation .
--
-- /Pre-release/
{-# INLINE md #-}
md ::  MonadIO m => Fold m ((Double, Maybe Double), m (MA.MutArray Double)) Double
md :: forall (m :: * -> *).
MonadIO m =>
Fold m ((Double, Maybe Double), m (MutArray Double)) Double
md =
    forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> Fold m a b -> Fold m a c
Fold.rmapM forall {m :: * -> *} {b}.
(Fractional b, MonadIO m, Unbox b) =>
(b, Maybe (m (MutArray b))) -> m b
computeMD
        forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a b c.
Monad m =>
Fold m a b -> Fold m a c -> Fold m a (b, c)
Fold.tee (forall a b (m :: * -> *) r. (a -> b) -> Fold m b r -> Fold m a r
Fold.lmap forall a b. (a, b) -> a
fst forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
mean) (forall a b (m :: * -> *) r. (a -> b) -> Fold m b r -> Fold m a r
Fold.lmap forall a b. (a, b) -> b
snd forall (m :: * -> *) a. Monad m => Fold m a (Maybe a)
Fold.latest)

    where

    computeMD :: (b, Maybe (m (MutArray b))) -> m b
computeMD (b
mn, Maybe (m (MutArray b))
rng) =
        case Maybe (m (MutArray b))
rng of
            Just m (MutArray b)
action -> do
                MutArray b
arr <- m (MutArray b)
action
                forall (m :: * -> *) a b.
Monad m =>
Fold m a b -> Stream m a -> m b
Stream.fold forall (m :: * -> *) a. (Monad m, Fractional a) => Fold m a a
Fold.mean
                    forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\b
a -> forall a. Num a => a -> a
abs (b
mn forall a. Num a => a -> a -> a
- b
a))
                    forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a b.
Applicative m =>
Unfold m a b -> a -> Stream m b
Stream.unfold forall (m :: * -> *) a.
(MonadIO m, Unbox a) =>
Unfold m (MutArray a) a
MA.reader MutArray b
arr
            Maybe (m (MutArray b))
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return b
0.0

-- | The variance \(\sigma^2\) of a population of \(n\) equally likely values
-- is defined as the average of the squares of deviations from the mean
-- \(\mu\). In other words, second moment about the mean:
--
-- \(\sigma^2 = \frac{1}{n}\sum_{i=1}^n {(x_{i}-\mu)}^2\)
--
-- \(\sigma^2 = rawMoment(2) - \mu^2\)
--
-- \(\mu_2 = -(\mu'_1)^2 + \mu'_2\)
--
-- Note that the variance would be biased if applied to estimate the population
-- variance from a sample of the population. See 'sampleVariance'.
--
-- See https://en.wikipedia.org/wiki/Variance.
--
-- /Space/: \(\mathcal{O}(1)\)
--
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE variance #-}
variance :: (Monad m, Fractional a) => Fold m (a, Maybe a) a
variance :: forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
variance = forall (m :: * -> *) a b c x.
Monad m =>
(a -> b -> c) -> Fold m x a -> Fold m x b -> Fold m x c
Fold.teeWith (\a
p2 a
m -> a
p2 forall a. Num a => a -> a -> a
- a
m forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int)) (forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Int -> Fold m (a, Maybe a) a
rawMoment Int
2) forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
mean

-- | Standard deviation \(\sigma\) is the square root of 'variance'.
--
-- This is the population standard deviation or uncorrected sample standard
-- deviation.
--
-- >>> stdDev = sqrt <$> variance
--
-- See https://en.wikipedia.org/wiki/Standard_deviation .
--
-- /Space/: \(\mathcal{O}(1)\)
--
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE stdDev #-}
stdDev :: (Monad m, Floating a) => Fold m (a, Maybe a) a
stdDev :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
stdDev = forall a. Floating a => a -> a
sqrt forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
variance

-- | Skewness \(\gamma\) is the standardized third central moment defined as:
--
-- \(\tilde{\mu}_3 = \frac{\mu_3}{\sigma^3}\)
--
-- The third central moment can be computed in terms of raw moments:
--
-- \(\mu_3 = 2(\mu'_1)^3 - 3\mu'_1\mu'_2 + \mu'_3\)
--
-- Substituting \(\mu'_1 = \mu\), and \(\mu'_2 = \mu^2 + \sigma^2\):
--
-- \(\mu_3 = -\mu^3 - 3\mu\sigma^2 + \mu'_3\)
--
-- Skewness is a measure of symmetry of the probability distribution. It is 0
-- for a symmetric distribution, negative for a distribution that is skewed
-- towards left, positive for a distribution skewed towards right.
--
-- For a normal like distribution the median can be found around
-- \(\mu - \frac{\gamma\sigma}{6}\) and the mode can be found around
-- \(\mu - \frac{\gamma \sigma}{2}\).
--
-- See https://en.wikipedia.org/wiki/Skewness .
--
{-# INLINE skewness #-}
skewness :: (Monad m, Floating a) => Fold m (a, Maybe a) a
skewness :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
skewness =
    forall (m :: * -> *) a b. Tee m a b -> Fold m a b
unTee
        forall a b. (a -> b) -> a -> b
$ (\a
rm3 a
sd a
mu ->
            a
rm3 forall a. Fractional a => a -> a -> a
/ a
sd forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
3 :: Int) forall a. Num a => a -> a -> a
- a
3 forall a. Num a => a -> a -> a
* (a
mu forall a. Fractional a => a -> a -> a
/ a
sd) forall a. Num a => a -> a -> a
- (a
mu forall a. Fractional a => a -> a -> a
/ a
sd) forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
3 :: Int)
          )
        forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a b. Fold m a b -> Tee m a b
Tee (forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Int -> Fold m (a, Maybe a) a
rawMoment Int
3)
        forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a b. Fold m a b -> Tee m a b
Tee forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
stdDev
        forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a b. Fold m a b -> Tee m a b
Tee forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
mean

-- XXX We can compute the 2nd, 3rd, 4th raw moments by repeatedly multiplying
-- instead of computing the powers every time.
--
-- | Kurtosis \(\kappa\) is the standardized fourth central moment, defined as:
--
-- \(\tilde{\mu}_4 = \frac{\mu_4}{\sigma^4}\)
--
-- The fourth central moment can be computed in terms of raw moments:
--
-- \(\mu_4 = -3(\mu'_1)^4 + 6(\mu'_1)^2\mu'_2 - 4\mu'_1\mu'_3\ + \mu'_4\)
--
-- Substituting \(\mu'_1 = \mu\), and \(\mu'_2 = \mu^2 + \sigma^2\):
--
-- \(\mu_4 = 3\mu^4 + 6\mu^2\sigma^2 - 4\mu\mu'_3 + \mu'_4\)
--
-- It is always non-negative. It is 0 for a point distribution, low for light
-- tailed (platykurtic) distributions and high for heavy tailed (leptokurtic)
-- distributions.
--
-- \(\kappa >= \gamma^2 + 1\)
--
-- For a normal distribution \(\kappa = 3\sigma^4\).
--
-- See https://en.wikipedia.org/wiki/Kurtosis .
--
{-# INLINE kurtosis #-}
kurtosis :: (Monad m, Floating a) => Fold m (a, Maybe a) a
kurtosis :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
kurtosis =
    forall (m :: * -> *) a b. Tee m a b -> Fold m a b
unTee
        forall a b. (a -> b) -> a -> b
$ (\a
rm4 a
rm3 a
sd a
mu ->
             ( a
3 forall a. Num a => a -> a -> a
* a
mu forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
4 :: Int)
            forall a. Num a => a -> a -> a
+ a
6 forall a. Num a => a -> a -> a
* a
mu forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int) forall a. Num a => a -> a -> a
* a
sd forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int)
            forall a. Num a => a -> a -> a
- a
4 forall a. Num a => a -> a -> a
* a
mu forall a. Num a => a -> a -> a
* a
rm3
            forall a. Num a => a -> a -> a
+ a
rm4) forall a. Fractional a => a -> a -> a
/ (a
sd forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
4 :: Int))
          )
        forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a b. Fold m a b -> Tee m a b
Tee (forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Int -> Fold m (a, Maybe a) a
rawMoment Int
4)
        forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a b. Fold m a b -> Tee m a b
Tee (forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Int -> Fold m (a, Maybe a) a
rawMoment Int
3)
        forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a b. Fold m a b -> Tee m a b
Tee forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
stdDev
        forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a b. Fold m a b -> Tee m a b
Tee forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
mean

-------------------------------------------------------------------------------
-- Estimation
-------------------------------------------------------------------------------

-- | Unbiased sample variance i.e. the variance of a sample corrected to
-- better estimate the variance of the population, defined as:
--
-- \(s^2 = \frac{1}{n - 1}\sum_{i=1}^n {(x_{i}-\mu)}^2\)
--
-- \(s^2 = \frac{n}{n - 1} \times \sigma^2\).
--
-- See https://en.wikipedia.org/wiki/Bessel%27s_correction.
--
{-# INLINE sampleVariance #-}
sampleVariance :: (Monad m, Fractional a) => Fold m (a, Maybe a) a
sampleVariance :: forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
sampleVariance = forall (m :: * -> *) a b c x.
Monad m =>
(a -> b -> c) -> Fold m x a -> Fold m x b -> Fold m x c
Fold.teeWith (\a
n a
s2 -> a
n forall a. Num a => a -> a -> a
* a
s2 forall a. Fractional a => a -> a -> a
/ (a
n forall a. Num a => a -> a -> a
- a
1)) forall (m :: * -> *) b a. (Monad m, Num b) => Fold m (a, Maybe a) b
Window.length forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
variance

-- | Sample standard deviation:
--
-- \(s = \sqrt{sampleVariance}\)
--
-- >>> sampleStdDev = sqrt <$> sampleVariance
--
-- See https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation
-- .
--
{-# INLINE sampleStdDev #-}
sampleStdDev :: (Monad m, Floating a) => Fold m (a, Maybe a) a
sampleStdDev :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
sampleStdDev = forall a. Floating a => a -> a
sqrt forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a.
(Monad m, Fractional a) =>
Fold m (a, Maybe a) a
sampleVariance

-- | Standard error of the sample mean (SEM), defined as:
--
-- \( SEM = \frac{sampleStdDev}{\sqrt{n}} \)
--
-- See https://en.wikipedia.org/wiki/Standard_error .
--
-- /Space/: \(\mathcal{O}(1)\)
--
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE stdErrMean #-}
stdErrMean :: (Monad m, Floating a) => Fold m (a, Maybe a) a
stdErrMean :: forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
stdErrMean = forall (m :: * -> *) a b c x.
Monad m =>
(a -> b -> c) -> Fold m x a -> Fold m x b -> Fold m x c
Fold.teeWith (\a
sd a
n -> a
sd forall a. Fractional a => a -> a -> a
/ forall a. Floating a => a -> a
sqrt a
n) forall (m :: * -> *) a.
(Monad m, Floating a) =>
Fold m (a, Maybe a) a
sampleStdDev forall (m :: * -> *) b a. (Monad m, Num b) => Fold m (a, Maybe a) b
Window.length

-------------------------------------------------------------------------------
-- Resampling
-------------------------------------------------------------------------------

{-# INLINE foldArray #-}
foldArray :: Unbox a => Fold Identity a b -> Array a -> b
foldArray :: forall a b. Unbox a => Fold Identity a b -> Array a -> b
foldArray Fold Identity a b
f = forall a. Identity a -> a
runIdentity forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) a b.
Monad m =>
Fold m a b -> Stream m a -> m b
Stream.fold Fold Identity a b
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) a. (Monad m, Unbox a) => Array a -> Stream m a
Array.read

-- XXX Is this numerically stable? Should we keep the rounding error in the sum
-- and take it into account when subtracting?
--
-- | Given an array of @n@ items, compute mean of @(n - 1)@ items at a time,
-- producing a stream of all possible mean values omitting a different item
-- every time.
--
{-# INLINE jackKnifeMean #-}
jackKnifeMean :: (Monad m, Fractional a, Unbox a) => Array a -> Stream m a
jackKnifeMean :: forall (m :: * -> *) a.
(Monad m, Fractional a, Unbox a) =>
Array a -> Stream m a
jackKnifeMean Array a
arr = do
    let len :: a
len = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Unbox a => Array a -> Int
length Array a
arr forall a. Num a => a -> a -> a
- Int
1)
        s :: a
s = forall a b. Unbox a => Fold Identity a b -> Array a -> b
foldArray forall (m :: * -> *) a. (Monad m, Num a) => Fold m a a
Fold.sum Array a
arr
     in forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\a
b -> (a
s forall a. Num a => a -> a -> a
- a
b) forall a. Fractional a => a -> a -> a
/ a
len) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. (Monad m, Unbox a) => Array a -> Stream m a
Array.read Array a
arr

-- | Given an array of @n@ items, compute variance of @(n - 1)@ items at a time,
-- producing a stream of all possible variance values omitting a different item
-- every time.
--
{-# INLINE jackKnifeVariance #-}
jackKnifeVariance :: (Monad m, Fractional a, Unbox a) =>
    Array a -> Stream m a
jackKnifeVariance :: forall (m :: * -> *) a.
(Monad m, Fractional a, Unbox a) =>
Array a -> Stream m a
jackKnifeVariance Array a
arr = do
    let len :: a
len = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall a. Unbox a => Array a -> Int
length Array a
arr forall a. Num a => a -> a -> a
- Int
1
        foldSums :: (b, b) -> b -> (b, b)
foldSums (b
s, b
s2) b
x = (b
s forall a. Num a => a -> a -> a
+ b
x, b
s2 forall a. Num a => a -> a -> a
+ b
x forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int))
        (a
sum, a
sum2) = forall a b. Unbox a => Fold Identity a b -> Array a -> b
foldArray (forall (m :: * -> *) b a.
Monad m =>
(b -> a -> b) -> b -> Fold m a b
Fold.foldl' forall {b}. Num b => (b, b) -> b -> (b, b)
foldSums (a
0.0, a
0.0)) Array a
arr
        var :: a -> a
var a
x = (a
sum2 forall a. Num a => a -> a -> a
- a
x forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int)) forall a. Fractional a => a -> a -> a
/ a
len forall a. Num a => a -> a -> a
-  ((a
sum forall a. Num a => a -> a -> a
- a
x) forall a. Fractional a => a -> a -> a
/ a
len) forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2::Int)
     in forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> a
var forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. (Monad m, Unbox a) => Array a -> Stream m a
Array.read Array a
arr

-- | Standard deviation computed from 'jackKnifeVariance'.
--
{-# INLINE jackKnifeStdDev #-}
jackKnifeStdDev :: (Monad m, Unbox a, Floating a) =>
    Array a -> Stream m a
jackKnifeStdDev :: forall (m :: * -> *) a.
(Monad m, Unbox a, Floating a) =>
Array a -> Stream m a
jackKnifeStdDev = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Floating a => a -> a
sqrt forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) a.
(Monad m, Fractional a, Unbox a) =>
Array a -> Stream m a
jackKnifeVariance

-- XXX This can be made more modular if the replicateM unfold can take count
-- from the seed.
--
-- | Randomly select elements from an array, with replacement, producing
-- a stream of the same size as the original array.
{-# INLINE resample #-}
resample :: (MonadIO m, Unbox a) => Unfold m (Array a) a
resample :: forall (m :: * -> *) a.
(MonadIO m, Unbox a) =>
Unfold m (Array a) a
resample = forall (m :: * -> *) a b s.
(s -> m (Step s b)) -> (a -> m s) -> Unfold m a b
Unfold forall {m :: * -> *} {a} {a}.
(MonadIO m, StatefulGen a IO, Unbox a) =>
(a, Array a, Int, Int) -> m (Step (a, Array a, Int, Int) a)
step forall {m :: * -> *} {a} {d}.
(MonadIO m, Unbox a, Num d) =>
Array a -> m (Gen RealWorld, Array a, Int, d)
inject

    where

    inject :: Array a -> m (Gen RealWorld, Array a, Int, d)
inject Array a
arr = forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ do
        Gen RealWorld
g <- IO GenIO
createSystemRandom
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Gen RealWorld
g, Array a
arr, forall a. Unbox a => Array a -> Int
length Array a
arr, d
0)

    chooseOne :: g -> Array b -> Int -> IO b
chooseOne g
g Array b
arr Int
len = do
        Int
i <- forall a g (m :: * -> *).
(UniformRange a, StatefulGen g m) =>
(a, a) -> g -> m a
uniformRM (Int
0, Int
len forall a. Num a => a -> a -> a
- Int
1) g
g
        forall a. Unbox a => Int -> Array a -> IO a
unsafeIndexIO Int
i Array b
arr

    step :: (a, Array a, Int, Int) -> m (Step (a, Array a, Int, Int) a)
step (a
g, Array a
arr, Int
len, Int
idx) = forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ do
        if Int
idx forall a. Ord a => a -> a -> Bool
>= Int
len
        then forall (m :: * -> *) a. Monad m => a -> m a
return forall s a. Step s a
Stop
        else do
            a
e <- forall {g} {b}.
(StatefulGen g IO, Unbox b) =>
g -> Array b -> Int -> IO b
chooseOne a
g Array a
arr Int
len
            forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall s a. a -> s -> Step s a
Yield a
e (a
g, Array a
arr, Int
len, Int
idx forall a. Num a => a -> a -> a
+ Int
1)

-- XXX Use concurrent combinators

-- | Resample an array multiple times and run the supplied fold on each
-- resampled stream, producing a stream of fold results. The fold is usually an
-- estimator fold.
{-# INLINE foldResamples #-}
foldResamples :: (MonadIO m, Unbox a) =>
       Int          -- ^ Number of resamples to compute.
    -> Array a      -- ^ Original sample.
    -> Fold m a b   -- ^ Estimator fold
    -> Stream m b
foldResamples :: forall (m :: * -> *) a b.
(MonadIO m, Unbox a) =>
Int -> Array a -> Fold m a b -> Stream m b
foldResamples Int
n Array a
arr Fold m a b
fld =
    forall (m :: * -> *) a. Monad m => Stream m (m a) -> Stream m a
Stream.sequence
        forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. Monad m => Int -> a -> Stream m a
Stream.replicate Int
n (forall (m :: * -> *) a b.
Monad m =>
Fold m a b -> Stream m a -> m b
Stream.fold Fold m a b
fld forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a b.
Applicative m =>
Unfold m a b -> a -> Stream m b
Stream.unfold forall (m :: * -> *) a.
(MonadIO m, Unbox a) =>
Unfold m (Array a) a
resample Array a
arr)

-------------------------------------------------------------------------------
-- Probability Distribution
-------------------------------------------------------------------------------

-- XXX We can use a Windowed classifyWith operation, that will allow us to
-- express windowed frequency, mode, histograms etc idiomatically.

-- | Count the frequency of elements in a sliding window.
--
-- >>> input = Stream.fromList [1,1,3,4,4::Int]
-- >>> f = Ring.slidingWindow 4 Statistics.frequency
-- >>> Stream.fold f input
-- fromList [(1,1),(3,1),(4,2)]
--
{-# INLINE frequency #-}
frequency :: (Monad m, Ord a) => Fold m (a, Maybe a) (Map a Int)
frequency :: forall (m :: * -> *) a.
(Monad m, Ord a) =>
Fold m (a, Maybe a) (Map a Int)
frequency = forall (m :: * -> *) b a.
Monad m =>
(b -> a -> b) -> b -> Fold m a b
Fold.foldl' forall {k} {a}.
(Eq a, Ord k, Num a) =>
Map k a -> (k, Maybe k) -> Map k a
step forall k a. Map k a
Map.empty

    where

    decrement :: a -> Maybe a
decrement a
v =
        if a
v forall a. Eq a => a -> a -> Bool
== a
1
        then forall a. Maybe a
Nothing
        else forall a. a -> Maybe a
Just (a
v forall a. Num a => a -> a -> a
- a
1)

    step :: Map k a -> (k, Maybe k) -> Map k a
step Map k a
refCountMap (k
new, Maybe k
mOld) =
        let m1 :: Map k a
m1 = forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
Map.insertWith forall a. Num a => a -> a -> a
(+) k
new a
1 Map k a
refCountMap
        in case Maybe k
mOld of
                Just k
k -> forall k a. Ord k => (a -> Maybe a) -> k -> Map k a -> Map k a
Map.update forall {a}. (Eq a, Num a) => a -> Maybe a
decrement k
k Map k a
m1
                Maybe k
Nothing -> Map k a
m1

-- XXX Check if the performance of window frequency is the same as this in the
-- full case, if so remove this.
-- XXX This is available in the streamly package as well.

-- | Determine the frequency of each element in the stream.
--
{-# INLINE frequency' #-}
frequency' :: (Monad m, Ord a) => Fold m a (Map a Int)
frequency' :: forall (m :: * -> *) a. (Monad m, Ord a) => Fold m a (Map a Int)
frequency' = forall (m :: * -> *) k a b.
(Monad m, Ord k) =>
(a -> k) -> Fold m a b -> Fold m a (Map k b)
Fold.toMap forall a. a -> a
id forall (m :: * -> *) a. Monad m => Fold m a Int
Fold.length

-- | Find out the most frequently ocurring element in the stream and its
-- frequency.
--
{-# INLINE mode #-}
mode :: (Monad m, Ord a) => Fold m a (Maybe (a, Int))
mode :: forall (m :: * -> *) a.
(Monad m, Ord a) =>
Fold m a (Maybe (a, Int))
mode = forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> Fold m a b -> Fold m a c
Fold.rmapM forall {k}. Map k Int -> m (Maybe (k, Int))
findMax forall (m :: * -> *) a. (Monad m, Ord a) => Fold m a (Map a Int)
frequency'

    where

    fmax :: a -> a -> Maybe (a, a) -> Maybe (a, a)
fmax a
k a
v Maybe (a, a)
Nothing = forall a. a -> Maybe a
Just (a
k, a
v)
    fmax a
k a
v old :: Maybe (a, a)
old@(Just (a
_, a
v1))
        | a
v forall a. Ord a => a -> a -> Bool
> a
v1 = forall a. a -> Maybe a
Just (a
k, a
v)
        | Bool
otherwise = Maybe (a, a)
old

    findMax :: Map k Int -> m (Maybe (k, Int))
findMax = forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a b. (k -> a -> b -> b) -> b -> Map k a -> b
Map.foldrWithKey forall {a} {a}. Ord a => a -> a -> Maybe (a, a) -> Maybe (a, a)
fmax forall a. Maybe a
Nothing

-------------------------------------------------------------------------------
-- Histograms
-------------------------------------------------------------------------------

-- | @binOffsetSize offset binSize input@. Given an integral input value,
-- return its bin index provided that each bin contains @binSize@ items and the
-- bins are aligned such that the 0 index bin starts at @offset@ from 0. If
-- offset = 0 then the bin with index 0 would have values from 0 to binSize -
-- 1.
--
-- This API does not put a bound on the number of bins, therefore, the number
-- of bins could be potentially large depending on the range of values.
--
{-# INLINE binOffsetSize #-}
binOffsetSize :: Integral a => a -> a -> a -> a
binOffsetSize :: forall a. Integral a => a -> a -> a -> a
binOffsetSize a
offset a
binSize a
x = (a
x forall a. Num a => a -> a -> a
- a
offset) forall a. Integral a => a -> a -> a
`div` a
binSize

data HistBin a = BelowRange | InRange a | AboveRange deriving (HistBin a -> HistBin a -> Bool
forall a. Eq a => HistBin a -> HistBin a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: HistBin a -> HistBin a -> Bool
$c/= :: forall a. Eq a => HistBin a -> HistBin a -> Bool
== :: HistBin a -> HistBin a -> Bool
$c== :: forall a. Eq a => HistBin a -> HistBin a -> Bool
Eq, Int -> HistBin a -> ShowS
forall a. Show a => Int -> HistBin a -> ShowS
forall a. Show a => [HistBin a] -> ShowS
forall a. Show a => HistBin a -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [HistBin a] -> ShowS
$cshowList :: forall a. Show a => [HistBin a] -> ShowS
show :: HistBin a -> [Char]
$cshow :: forall a. Show a => HistBin a -> [Char]
showsPrec :: Int -> HistBin a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> HistBin a -> ShowS
Show)

instance (Ord a) => Ord (HistBin a) where
    compare :: HistBin a -> HistBin a -> Ordering
compare HistBin a
BelowRange HistBin a
BelowRange = Ordering
EQ
    compare HistBin a
BelowRange (InRange a
_) = Ordering
LT
    compare HistBin a
BelowRange HistBin a
AboveRange = Ordering
LT

    compare (InRange a
_) HistBin a
BelowRange = Ordering
GT
    compare (InRange a
x) (InRange a
y)= a
x forall a. Ord a => a -> a -> Ordering
`compare` a
y
    compare (InRange a
_) HistBin a
AboveRange = Ordering
LT

    compare HistBin a
AboveRange HistBin a
BelowRange = Ordering
GT
    compare HistBin a
AboveRange (InRange a
_) = Ordering
GT
    compare HistBin a
AboveRange HistBin a
AboveRange = Ordering
EQ

-- | @binFromSizeN low binSize nbins input@. Classify @input@ into bins
-- specified by a @low@ limit, @binSize@ and @nbins@. Inputs below the lower
-- limit are classified into 'BelowRange' and inputs above the highest bin are
-- classified into 'AboveRange'. 'InRange' inputs are classified into bins
-- starting from bin index 0.
--
{-# INLINE binFromSizeN #-}
binFromSizeN :: Integral a => a -> a -> a -> a -> HistBin a
binFromSizeN :: forall a. Integral a => a -> a -> a -> a -> HistBin a
binFromSizeN a
low a
binSize a
nbins a
x =
    let high :: a
high = a
low forall a. Num a => a -> a -> a
+ a
binSize forall a. Num a => a -> a -> a
* a
nbins
     in if a
x forall a. Ord a => a -> a -> Bool
< a
low
        then forall a. HistBin a
BelowRange
        else if a
x forall a. Ord a => a -> a -> Bool
>= a
high
             then forall a. HistBin a
AboveRange
             else forall a. a -> HistBin a
InRange ((a
x forall a. Num a => a -> a -> a
- a
low) forall a. Integral a => a -> a -> a
`div` a
binSize)

-- | @binFromToN low high nbins input@. Like @binFromSizeN@ except that a range
-- of lower and higher limit is specified. @binSize@ is computed using the
-- range and @nbins@. @nbins@ is rounded to the range @0 < nbins < (high - low
-- + 1)@. @high >= low@ must hold.
--
{-# INLINE binFromToN #-}
binFromToN :: Integral a => a -> a -> a -> a -> HistBin a
binFromToN :: forall a. Integral a => a -> a -> a -> a -> HistBin a
binFromToN a
low a
high a
n a
x =
    let count :: a
count = a
high forall a. Num a => a -> a -> a
- a
low forall a. Num a => a -> a -> a
+ a
1
        n1 :: a
n1 = forall a. Ord a => a -> a -> a
max a
n a
1
        n2 :: a
n2 = forall a. Ord a => a -> a -> a
min a
n1 a
count
        binSize :: a
binSize = a
count forall a. Integral a => a -> a -> a
`div` a
n2
        nbins :: a
nbins =
            if a
binSize forall a. Num a => a -> a -> a
* a
n2 forall a. Ord a => a -> a -> Bool
< a
count
            then a
n2 forall a. Num a => a -> a -> a
+ a
1
            else a
n2
     in forall a. HasCallStack => Bool -> a -> a
assert (a
high forall a. Ord a => a -> a -> Bool
>= a
low) (forall a. Integral a => a -> a -> a -> a -> HistBin a
binFromSizeN a
low a
binSize a
nbins a
x)

-- Use binary search to find the bin
--
-- | Classify an input value to bins using the bin boundaries specified in an
-- array.
--
-- /Unimplemented/
--
{-# INLINE binBoundaries #-}
binBoundaries :: -- Integral a =>
    Array.Array a -> a -> HistBin a
binBoundaries :: forall a. Array a -> a -> HistBin a
binBoundaries = forall a. HasCallStack => a
undefined

-- | Given a bin classifier function and a stream of values, generate a
-- histogram map from indices of bins to the number of items in the bin.
--
-- >>> Stream.fold (histogram (binOffsetSize 0 3)) $ Stream.fromList [1..15]
-- fromList [(0,2),(1,3),(2,3),(3,3),(4,3),(5,1)]
--
{-# INLINE histogram #-}
histogram :: (Monad m, Ord k) => (a -> k) -> Fold m a (Map k Int)
histogram :: forall (m :: * -> *) k a.
(Monad m, Ord k) =>
(a -> k) -> Fold m a (Map k Int)
histogram a -> k
bin = forall (m :: * -> *) k a b.
(Monad m, Ord k) =>
(a -> k) -> Fold m a b -> Fold m a (Map k b)
Fold.toMap a -> k
bin forall (m :: * -> *) a. Monad m => Fold m a Int
Fold.length