-- |
-- Module      :  Numeric.Stats
-- Copyright   :  (c) OleksandrZhabenko 2020-2023
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  oleksandr.zhabenko@yahoo.com
--
-- A very basic descriptive statistics. Functions use a tail recursion approach to compute the values and are strict by an accumulator.

{-# LANGUAGE BangPatterns, MagicHash, NoImplicitPrelude #-}

module Numeric.Stats where

import GHC.Exts
import GHC.Prim
import GHC.Base
import GHC.Real
import GHC.Float
import GHC.Num

-- | Uses GHC unlifted types from @ghc-prim@ package.
-- Similar code is here: Don Stewart.  https://donsbot.wordpress.com/2008/05/06/write-haskell-as-fast-as-c-exploiting-strictness-laziness-and-recursion/
-- And here: http://fixpt.de/blog/2017-12-04-strictness-analysis-part-1.html
-- And here: Michael Snoyman. https://www.fpcomplete.com/blog/2017/09/all-about-strictness/
--
mean2F :: [Float] -> Float# -> Int# -> Float
mean2F ((F# !x):xs) !s1 !l1 = mean2F xs (plusFloat# s1 x) (l1 +# 1#)
mean2F _ !s1 !l1 =
 case I# l1 of
  0 -> error "Numeric.Stats.mean2F: Not defined for the third zero argument. "
  _  -> F# m
   where !m = divideFloat# s1 (int2Float# l1)

-- | Uses GHC unlifted types from @ghc-prim@ package.
-- Similar code is here: Don Stewart.  https://donsbot.wordpress.com/2008/05/06/write-haskell-as-fast-as-c-exploiting-strictness-laziness-and-recursion/
-- And here: http://fixpt.de/blog/2017-12-04-strictness-analysis-part-1.html
-- And here: Michael Snoyman. https://www.fpcomplete.com/blog/2017/09/all-about-strictness/
--
mean2D :: [Double] -> Double# -> Int# -> Double
mean2D ((D# !x):xs) !s1 !l1 = mean2D xs (s1 +## x) (l1 +# 1#)
mean2D _ !s1 !l1 =
 case I# l1 of
  0 -> error "Numeric.Stats.mean2D: Not defined for the third zero argument. "
  _  -> D# m
   where !m = s1 /## int2Double# l1

-- | One-pass and tail-recursive realization for the pair of the mean and dispersion. Is vulnerable to the floating-point cancellation errors.
-- Similar code is here: Don Stewart.  https://donsbot.wordpress.com/2008/05/06/write-haskell-as-fast-as-c-exploiting-strictness-laziness-and-recursion/
-- And here: http://fixpt.de/blog/2017-12-04-strictness-analysis-part-1.html
-- And here: Michael Snoyman. https://www.fpcomplete.com/blog/2017/09/all-about-strictness/
-- When using the needed, please, refer better to their variants.
--
-- Among the 'meanWithDispersion', 'meanWithDisprsionF' and 'meanWithDispersionD' better to use the last one.
meanWithDispersionP :: (RealFrac a, Floating a) => [a] -> a -> a -> a -> a -> a -> (a,a)
meanWithDispersionP (!x:xs) !s1 !s2 !l1 m1 d = meanWithDispersionP xs (s1 + x) (s2 + x*x) (l1 + 1) (m0 s1 l1 x) (m0 s2 l1 (x*x) - (m0 s1 l1 x)**2)
  where m0 !s3 !l2 !x = (s3 + x) / (l2 + 1)
meanWithDispersionP _ _ _ _ !m !d = (m,d)

-- | Among the 'meanWithDispersion', 'meanWithDisprsionF' and 'meanWithDispersionD' better to use the last one.
meanWithDispersionFP :: [Float] -> Float# -> Float# -> Int# -> (Float,Float)
meanWithDispersionFP ((F# !x):xs) !s1 !s2 !l1 = meanWithDispersionFP xs (plusFloat# s1 x) (plusFloat# s2 (timesFloat# x x)) (l1 +# 1#)
meanWithDispersionFP [] !s1 !s2 !l1 = (F# m, F# (minusFloat# (divideFloat# s2 (int2Float# l1)) (timesFloat# m m)))
  where !m = divideFloat# s1 (int2Float# l1)

-- | Among the 'meanWithDispersion', 'meanWithDisprsionF' and 'meanWithDispersionD' better to use the last one.
meanWithDispersionDP :: [Double] -> Double# -> Double# -> Int# -> (Double,Double)
meanWithDispersionDP ((D# !x):xs) !s1 !s2 !l1 = meanWithDispersionDP xs (s1 +## x) (s2 +## (x *## x)) (l1 +# 1#)
meanWithDispersionDP [] !s1 !s2 !l1 = (D# m, D# ((s2 /## int2Double# l1) -## (m *## m)))
  where !m = s1 /## int2Double# l1

-- | Uses 'mean2F' inside.
meanF :: [Float] -> Float
meanF xs = mean2F xs 0.0# 0#
{-# INLINE meanF #-}

meanD :: [Double] -> Double
meanD xs = mean2D xs 0.0## 0#

-- | Among the 'meanWithDispP', 'meanWithDispF2P' and 'meanWithDispD2P' better to use the last one.
meanWithDispP :: (RealFrac a, Floating a) => [a] -> (a,a)
meanWithDispP xs@(_:_) = meanWithDispersionP xs 0.0 0.0 0.0 0.0 0.0
meanWithDispP _ = error "Numeric.Stats.meanWithDispP: Not defined for the empty list. "
{-# RULES "realfrac/float" meanWithDispP = meanWithDispF2P #-}
{-# RULES "realfrac/double" meanWithDispP = meanWithDispD2P #-}
{-# INLINE[2] meanWithDispP #-}

-- | Among the 'meanWithDispP', 'meanWithDispF2P' and 'meanWithDispD2P' better to use the last one.
meanWithDispF2P :: [Float] -> (Float,Float)
meanWithDispF2P xs@(_:_) = meanWithDispersionFP xs 0.0# 0.0# 0#
meanWithDispF2P _ = error "Numeric.Stats.meanWithDispF2P: Not defined for the empty list. "
{-# INLINE meanWithDispF2P #-}

-- | Among the 'meanWithDispP', 'meanWithDispF2P' and 'meanWithDispD2P' better to use the last one.
meanWithDispD2P :: [Double] -> (Double,Double)
meanWithDispD2P xs@(x:_) = meanWithDispersionDP xs 0.0## 0.0## 0#
meanWithDispD2P _ = error "Numeric.Stats.meanWithDispD2P: Not defined for the empty list. "
{-# INLINE meanWithDispD2P #-}

--------------------------------------------------

-- Inspired by: https://www.khanacademy.org/math/ap-statistics/summarizing-quantitative-data-ap/more-standard-deviation/v/simulation-showing-bias-in-sample-variance
-- and:
-- https://www.khanacademy.org/math/ap-statistics/summarizing-quantitative-data-ap/more-standard-deviation/v/simulation-providing-evidence-that-n-1-gives-us-unbiased-estimate


-- | One-pass and tail-recursive realization for the pair of the mean and dispersion. Is vulnerable to the floating-point cancellation errors.
-- Similar code is here: Don Stewart.  https://donsbot.wordpress.com/2008/05/06/write-haskell-as-fast-as-c-exploiting-strictness-laziness-and-recursion/
-- And here: http://fixpt.de/blog/2017-12-04-strictness-analysis-part-1.html
-- And here: Michael Snoyman. https://www.fpcomplete.com/blog/2017/09/all-about-strictness/
-- When using the needed, please, refer better to their variants.
-- Among the 'meanWithDisp', 'meanWithDispF2' and 'meanWithDispD2' better to use the last one.
meanWithDisp :: (RealFrac a, Floating a) => [a] -> (a,a)
meanWithDisp xs@(_:_:_) = mdl xs 0.0 0.0 0.0
  where mdl (!x:ys) !s1 !s2 !l1 = mdl ys (s1 + x) (s2 + x*x) (l1 + 1)
        mdl _ !s1 !s2 !l = (mm, (s2 - mm**2 * l) / (l - 1))
          where !mm = s1 / l
meanWithDisp _ = error "Numeric.Stats.meanWithDisp: Not defined for the list with less than two elements. "
{-# RULES "realfrac/float" meanWithDisp = meanWithDispF2 #-}
{-# RULES "realfrac/double" meanWithDisp = meanWithDispD2 #-}
{-# INLINE[2] meanWithDisp #-}

-- | One-pass and tail-recursive realization for the pair of the mean and dispersion. Is vulnerable to the floating-point cancellation errors.
-- Similar code is here: Don Stewart.  https://donsbot.wordpress.com/2008/05/06/write-haskell-as-fast-as-c-exploiting-strictness-laziness-and-recursion/
-- And here: http://fixpt.de/blog/2017-12-04-strictness-analysis-part-1.html
-- And here: Michael Snoyman. https://www.fpcomplete.com/blog/2017/09/all-about-strictness/
-- When using the needed, please, refer better to their variants.
-- Among the 'meanWithDisp', 'meanWithDispF2' and 'meanWithDispD2' better to use the last one.
meanWithDispF2 :: [Float] -> (Float,Float)
meanWithDispF2 xs@(_:_:_) = mdlF xs 0.0# 0.0# 0#
  where mdlF (F# !x:ys) !s1 !s2 !l1 = mdlF ys (plusFloat# s1 x) (plusFloat# s2 (timesFloat# x x)) (l1 +# 1#)
        mdlF [] !s1 !s2 !l1 = (F# m, F# (divideFloat# (minusFloat# s2 (timesFloat# (timesFloat# m m) (int2Float# l1))) (int2Float# (l1 -# 1#))))
          where !m = divideFloat# s1 (int2Float# l1)
meanWithDispF2 _ = error "Numeric.Stats.meanWithDispF2: Not defined for the list with less than two elements. "
{-# INLINE meanWithDispF2 #-}

-- | One-pass and tail-recursive realization for the pair of the mean and dispersion. Is vulnerable to the floating-point cancellation errors.
-- Similar code is here: Don Stewart.  https://donsbot.wordpress.com/2008/05/06/write-haskell-as-fast-as-c-exploiting-strictness-laziness-and-recursion/
-- And here: http://fixpt.de/blog/2017-12-04-strictness-analysis-part-1.html
-- And here: Michael Snoyman. https://www.fpcomplete.com/blog/2017/09/all-about-strictness/
-- When using the needed, please, refer better to their variants.
-- Among the 'meanWithDisp', 'meanWithDispF2' and 'meanWithDispD2' better to use the last one.
meanWithDispD2 :: [Double] -> (Double,Double)
meanWithDispD2 xs@(_:_:_) = mdlD xs 0.0## 0.0## 0#
  where mdlD ((D# !x):xs) !s1 !s2 !l1 = mdlD xs (s1 +## x) (s2 +## (x *## x)) (l1 +# 1#)
        mdlD [] !s1 !s2 !l1 = (D# m, D# ((s2 -## m *## m *## int2Double# l1) /## (int2Double# (l1 -# 1#))))
          where !m = s1 /## int2Double# l1
meanWithDispD2 _ = error "Numeric.Stats.meanWithDispD2: Not defined for the list with less than two elements. "
{-# INLINE meanWithDispD2 #-}