{-# OPTIONS_GHC -Wno-partial-fields #-}
{-# LANGUAGE DeriveAnyClass    #-}
{-# LANGUAGE DeriveGeneric     #-}
{-# LANGUAGE FlexibleInstances #-}
module Statistics.Sample.WelfordOnlineMeanVariance
  ( WelfordExistingAggregate(..)
  , WelfordOnline (..)
  , newWelfordAggregateDef
  , newWelfordAggregate
  , welfordCount
  , mWelfordMean
  , welfordMeanDef
  , addValue
  , addValues
  , mFinalize
  , finalize
  , nextValue
  , isWelfordExistingAggregateEmpty
  , Mean
  , Variance
  , SampleVariance
  ) where

import           Control.DeepSeq
import           Data.Maybe           (fromMaybe)
import           Data.Serialize
import qualified Data.Vector          as VB
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Unboxed  as VU
import           GHC.Generics

type Mean a = a
type Variance a = a
type SampleVariance a = a


-- | For the storage of required information.
data WelfordExistingAggregate a
  = WelfordExistingAggregateEmpty -- ^ Emtpy aggregate. Needed as `a` can be of any type, which, hence, allows us to postpone determining `a` to when we receive the first value.
  | WelfordExistingAggregate
      { forall a. WelfordExistingAggregate a -> Int
welfordCountUnsafe :: !Int
      , forall a. WelfordExistingAggregate a -> a
welfordMeanUnsafe  :: !a
      , forall a. WelfordExistingAggregate a -> a
welfordM2Unsafe    :: !a
      }
  deriving (WelfordExistingAggregate a -> WelfordExistingAggregate a -> Bool
forall a.
Eq a =>
WelfordExistingAggregate a -> WelfordExistingAggregate a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: WelfordExistingAggregate a -> WelfordExistingAggregate a -> Bool
$c/= :: forall a.
Eq a =>
WelfordExistingAggregate a -> WelfordExistingAggregate a -> Bool
== :: WelfordExistingAggregate a -> WelfordExistingAggregate a -> Bool
$c== :: forall a.
Eq a =>
WelfordExistingAggregate a -> WelfordExistingAggregate a -> Bool
Eq, Int -> WelfordExistingAggregate a -> ShowS
forall a. Show a => Int -> WelfordExistingAggregate a -> ShowS
forall a. Show a => [WelfordExistingAggregate a] -> ShowS
forall a. Show a => WelfordExistingAggregate a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [WelfordExistingAggregate a] -> ShowS
$cshowList :: forall a. Show a => [WelfordExistingAggregate a] -> ShowS
show :: WelfordExistingAggregate a -> String
$cshow :: forall a. Show a => WelfordExistingAggregate a -> String
showsPrec :: Int -> WelfordExistingAggregate a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> WelfordExistingAggregate a -> ShowS
Show, ReadPrec [WelfordExistingAggregate a]
ReadPrec (WelfordExistingAggregate a)
ReadS [WelfordExistingAggregate a]
forall a. Read a => ReadPrec [WelfordExistingAggregate a]
forall a. Read a => ReadPrec (WelfordExistingAggregate a)
forall a. Read a => Int -> ReadS (WelfordExistingAggregate a)
forall a. Read a => ReadS [WelfordExistingAggregate a]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [WelfordExistingAggregate a]
$creadListPrec :: forall a. Read a => ReadPrec [WelfordExistingAggregate a]
readPrec :: ReadPrec (WelfordExistingAggregate a)
$creadPrec :: forall a. Read a => ReadPrec (WelfordExistingAggregate a)
readList :: ReadS [WelfordExistingAggregate a]
$creadList :: forall a. Read a => ReadS [WelfordExistingAggregate a]
readsPrec :: Int -> ReadS (WelfordExistingAggregate a)
$creadsPrec :: forall a. Read a => Int -> ReadS (WelfordExistingAggregate a)
Read, forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall a x.
Rep (WelfordExistingAggregate a) x -> WelfordExistingAggregate a
forall a x.
WelfordExistingAggregate a -> Rep (WelfordExistingAggregate a) x
$cto :: forall a x.
Rep (WelfordExistingAggregate a) x -> WelfordExistingAggregate a
$cfrom :: forall a x.
WelfordExistingAggregate a -> Rep (WelfordExistingAggregate a) x
Generic, forall a. NFData a => WelfordExistingAggregate a -> ()
forall a. (a -> ()) -> NFData a
rnf :: WelfordExistingAggregate a -> ()
$crnf :: forall a. NFData a => WelfordExistingAggregate a -> ()
NFData, forall a. Serialize a => Get (WelfordExistingAggregate a)
forall a. Serialize a => Putter (WelfordExistingAggregate a)
forall t. Putter t -> Get t -> Serialize t
get :: Get (WelfordExistingAggregate a)
$cget :: forall a. Serialize a => Get (WelfordExistingAggregate a)
put :: Putter (WelfordExistingAggregate a)
$cput :: forall a. Serialize a => Putter (WelfordExistingAggregate a)
Serialize)

-- | Create a new empty Aggreate for the calculation.
newWelfordAggregate :: WelfordExistingAggregate a
newWelfordAggregate :: forall a. WelfordExistingAggregate a
newWelfordAggregate = forall a. WelfordExistingAggregate a
WelfordExistingAggregateEmpty

-- | Create a new empty Aggreate by specifying an example `a` value. It is safe to use the `*Unsafe` record field selectors from `WelfordExistingAggregate a`, when creating the data structure using
-- this fuction.
newWelfordAggregateDef :: (WelfordOnline a) => a -> WelfordExistingAggregate a
newWelfordAggregateDef :: forall a. WelfordOnline a => a -> WelfordExistingAggregate a
newWelfordAggregateDef a
a = forall a. Int -> a -> a -> WelfordExistingAggregate a
WelfordExistingAggregate Int
0 (a
a forall a. WelfordOnline a => a -> a -> a
`minus` a
a) (a
a forall a. WelfordOnline a => a -> a -> a
`minus` a
a)

-- | Check if it is aggregate is empty
isWelfordExistingAggregateEmpty :: WelfordExistingAggregate a -> Bool
isWelfordExistingAggregateEmpty :: forall a. WelfordExistingAggregate a -> Bool
isWelfordExistingAggregateEmpty WelfordExistingAggregate a
WelfordExistingAggregateEmpty = Bool
True
isWelfordExistingAggregateEmpty WelfordExistingAggregate a
_                             = Bool
False

-- | Get counter safely, returns Nothing if `WelfordExistingAggregateEmpty`.
welfordCount :: WelfordExistingAggregate a -> Int
welfordCount :: forall a. WelfordExistingAggregate a -> Int
welfordCount WelfordExistingAggregate a
WelfordExistingAggregateEmpty    = Int
0
welfordCount (WelfordExistingAggregate Int
c a
_ a
_) = Int
c

-- | Get counter safely, returns Nothing if `WelfordExistingAggregateEmpty`.
mWelfordMean :: WelfordExistingAggregate a -> Maybe a
mWelfordMean :: forall a. WelfordExistingAggregate a -> Maybe a
mWelfordMean WelfordExistingAggregate a
WelfordExistingAggregateEmpty    = forall a. Maybe a
Nothing -- Cannot create value `a`
mWelfordMean (WelfordExistingAggregate Int
_ a
m a
_) = forall a. a -> Maybe a
Just a
m

-- | Get counter with specifying a default value.
welfordMeanDef :: (WelfordOnline a) => a -> WelfordExistingAggregate a -> a
welfordMeanDef :: forall a. WelfordOnline a => a -> WelfordExistingAggregate a -> a
welfordMeanDef a
a WelfordExistingAggregate a
WelfordExistingAggregateEmpty    = a
a forall a. WelfordOnline a => a -> a -> a
`minus` a
a
welfordMeanDef a
_ (WelfordExistingAggregate Int
_ a
m a
_) = a
m

-- | Add one value to the current aggregate.
addValue :: (WelfordOnline a) => WelfordExistingAggregate a -> a -> WelfordExistingAggregate a
addValue :: forall a.
WelfordOnline a =>
WelfordExistingAggregate a -> a -> WelfordExistingAggregate a
addValue (WelfordExistingAggregate Int
count a
mean a
m2) a
val =
  let count' :: Int
count' = Int
count forall a. Num a => a -> a -> a
+ Int
1
      delta :: a
delta = a
val forall a. WelfordOnline a => a -> a -> a
`minus` a
mean
      mean' :: a
mean' = a
mean forall a. WelfordOnline a => a -> a -> a
`plus` (a
delta forall a. WelfordOnline a => a -> Int -> a
`divideInt` Int
count')
      delta2 :: a
delta2 = a
val forall a. WelfordOnline a => a -> a -> a
`minus` a
mean'
      m2' :: a
m2' = a
m2 forall a. WelfordOnline a => a -> a -> a
`plus` (a
delta forall a. WelfordOnline a => a -> a -> a
`multiply` a
delta2)
   in forall a. Int -> a -> a -> WelfordExistingAggregate a
WelfordExistingAggregate Int
count' a
mean' a
m2'
addValue WelfordExistingAggregate a
WelfordExistingAggregateEmpty a
val =
  let count' :: Int
count' = Int
1
      delta' :: a
delta' = a
val
      mean' :: a
mean' = a
delta' forall a. WelfordOnline a => a -> Int -> a
`divideInt` Int
count'
      delta2' :: a
delta2' = a
val forall a. WelfordOnline a => a -> a -> a
`minus` a
mean'
      m2' :: a
m2' = a
delta' forall a. WelfordOnline a => a -> a -> a
`multiply` a
delta2'
   in forall a. Int -> a -> a -> WelfordExistingAggregate a
WelfordExistingAggregate Int
count' a
mean' a
m2'

-- | Add multiple values to the current aggregate. This is `foldl addValue`.
addValues :: (WelfordOnline a, Foldable f) => WelfordExistingAggregate a -> f a -> WelfordExistingAggregate a
addValues :: forall a (f :: * -> *).
(WelfordOnline a, Foldable f) =>
WelfordExistingAggregate a -> f a -> WelfordExistingAggregate a
addValues = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl forall a.
WelfordOnline a =>
WelfordExistingAggregate a -> a -> WelfordExistingAggregate a
addValue

-- | Calculate mean, variance and sample variance from aggregate. Calls `error` for `WelfordExistingAggregateEmpty`.
finalize :: (WelfordOnline a) => WelfordExistingAggregate a -> (Mean a, Variance a, SampleVariance a)
finalize :: forall a.
WelfordOnline a =>
WelfordExistingAggregate a -> (a, a, a)
finalize = forall a. a -> Maybe a -> a
fromMaybe forall {a}. a
err forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a.
WelfordOnline a =>
WelfordExistingAggregate a -> Maybe (a, a, a)
mFinalize
  where err :: a
err = forall a. HasCallStack => String -> a
error String
"Statistics.Sample.WelfordOnlineMeanVariance.finalize: Emtpy Welford Online Aggreate. Add data first!"

-- | Calculate mean, variance and sample variance from aggregate. Safe function.
mFinalize :: (WelfordOnline a) => WelfordExistingAggregate a -> Maybe (Mean a, Variance a, SampleVariance a)
mFinalize :: forall a.
WelfordOnline a =>
WelfordExistingAggregate a -> Maybe (a, a, a)
mFinalize (WelfordExistingAggregate Int
count a
mean a
m2)
  | Int
count forall a. Ord a => a -> a -> Bool
< Int
2 = forall a. a -> Maybe a
Just (a
mean, a
m2, a
m2)
  | Bool
otherwise = forall a. a -> Maybe a
Just (a
mean, a
m2 forall a. WelfordOnline a => a -> Int -> a
`divideInt` Int
count, a
m2 forall a. WelfordOnline a => a -> Int -> a
`divideInt` (Int
count forall a. Num a => a -> a -> a
- Int
1))
mFinalize WelfordExistingAggregate a
WelfordExistingAggregateEmpty = forall a. Maybe a
Nothing


-- | Add a new sample to the aggregate and compute mean and variances.
nextValue :: (WelfordOnline a) => WelfordExistingAggregate a -> a -> (WelfordExistingAggregate a, (Mean a, Variance a, SampleVariance a))
nextValue :: forall a.
WelfordOnline a =>
WelfordExistingAggregate a
-> a -> (WelfordExistingAggregate a, (a, a, a))
nextValue WelfordExistingAggregate a
agg a
val =
  let agg' :: WelfordExistingAggregate a
agg' = forall a.
WelfordOnline a =>
WelfordExistingAggregate a -> a -> WelfordExistingAggregate a
addValue WelfordExistingAggregate a
agg a
val
   in (WelfordExistingAggregate a
agg', forall a.
WelfordOnline a =>
WelfordExistingAggregate a -> (a, a, a)
finalize WelfordExistingAggregate a
agg')


-- | Class for all data strucutres that can be used to computer the Welford approximation. For instance, this can be used to compute the Welford algorithm on a `Vector`s of `Fractional`, while only
-- requiring to handle one `WelfordExistingAggregate`.
class WelfordOnline a where
  plus :: a -> a -> a
  minus :: a -> a -> a
  multiply :: a -> a -> a
  divideInt :: a -> Int -> a


instance (WelfordOnline a) => WelfordOnline (VB.Vector a) where
  plus :: Vector a -> Vector a -> Vector a
plus          = forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
VB.zipWith forall a. WelfordOnline a => a -> a -> a
plus
  {-# INLINE plus #-}
  minus :: Vector a -> Vector a -> Vector a
minus         = forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
VB.zipWith forall a. WelfordOnline a => a -> a -> a
minus
  {-# INLINE minus #-}
  multiply :: Vector a -> Vector a -> Vector a
multiply      = forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
VB.zipWith forall a. WelfordOnline a => a -> a -> a
multiply
  {-# INLINE multiply #-}
  divideInt :: Vector a -> Int -> Vector a
divideInt Vector a
x Int
i = forall a b. (a -> b) -> Vector a -> Vector b
VB.map (forall a. WelfordOnline a => a -> Int -> a
`divideInt` Int
i) Vector a
x
  {-# INLINE divideInt #-}

instance (WelfordOnline a, VS.Storable a) => WelfordOnline (VS.Vector a) where
  plus :: Vector a -> Vector a -> Vector a
plus          = forall a b c.
(Storable a, Storable b, Storable c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
VS.zipWith forall a. WelfordOnline a => a -> a -> a
plus
  {-# INLINE plus #-}
  minus :: Vector a -> Vector a -> Vector a
minus         = forall a b c.
(Storable a, Storable b, Storable c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
VS.zipWith forall a. WelfordOnline a => a -> a -> a
minus
  {-# INLINE minus #-}
  multiply :: Vector a -> Vector a -> Vector a
multiply      = forall a b c.
(Storable a, Storable b, Storable c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
VS.zipWith forall a. WelfordOnline a => a -> a -> a
multiply
  {-# INLINE multiply #-}
  divideInt :: Vector a -> Int -> Vector a
divideInt Vector a
x Int
i = forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
VS.map (forall a. WelfordOnline a => a -> Int -> a
`divideInt` Int
i) Vector a
x
  {-# INLINE divideInt #-}


instance (WelfordOnline a, VU.Unbox a) => WelfordOnline (VU.Vector a) where
  plus :: Vector a -> Vector a -> Vector a
plus          = forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
VU.zipWith forall a. WelfordOnline a => a -> a -> a
plus
  {-# INLINE plus #-}
  minus :: Vector a -> Vector a -> Vector a
minus         = forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
VU.zipWith forall a. WelfordOnline a => a -> a -> a
minus
  {-# INLINE minus #-}
  multiply :: Vector a -> Vector a -> Vector a
multiply      = forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
VU.zipWith forall a. WelfordOnline a => a -> a -> a
multiply
  {-# INLINE multiply #-}
  divideInt :: Vector a -> Int -> Vector a
divideInt Vector a
x Int
i = forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (forall a. WelfordOnline a => a -> Int -> a
`divideInt` Int
i) Vector a
x
  {-# INLINE divideInt #-}


instance WelfordOnline Double where
  plus :: Double -> Double -> Double
plus = forall a. Num a => a -> a -> a
(+)
  {-# INLINE plus #-}
  minus :: Double -> Double -> Double
minus = (-)
  {-# INLINE minus #-}
  multiply :: Double -> Double -> Double
multiply = forall a. Num a => a -> a -> a
(*)
  {-# INLINE multiply #-}
  divideInt :: Double -> Int -> Double
divideInt Double
x Int
i = Double
x forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i
  {-# INLINE divideInt #-}

instance WelfordOnline Float where
  plus :: Float -> Float -> Float
plus = forall a. Num a => a -> a -> a
(+)
  {-# INLINE plus #-}
  minus :: Float -> Float -> Float
minus = (-)
  {-# INLINE minus #-}
  multiply :: Float -> Float -> Float
multiply = forall a. Num a => a -> a -> a
(*)
  {-# INLINE multiply #-}
  divideInt :: Float -> Int -> Float
divideInt Float
x Int
i = Float
x forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i
  {-# INLINE divideInt #-}

instance WelfordOnline Rational where
  plus :: Rational -> Rational -> Rational
plus = forall a. Num a => a -> a -> a
(+)
  {-# INLINE plus #-}
  minus :: Rational -> Rational -> Rational
minus = (-)
  {-# INLINE minus #-}
  multiply :: Rational -> Rational -> Rational
multiply = forall a. Num a => a -> a -> a
(*)
  {-# INLINE multiply #-}
  divideInt :: Rational -> Int -> Rational
divideInt Rational
x Int
i = Rational
x forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i
  {-# INLINE divideInt #-}