{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE BangPatterns #-}
-- |
-- Module : Statistics.Sample.Powers
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan
-- License : BSD3
--
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : portable
--
-- Very fast statistics over simple powers of a sample. These can all
-- be computed efficiently in just a single pass over a sample, with
-- that pass subject to stream fusion.
--
-- The tradeoff is that some of these functions are less numerically
-- robust than their counterparts in the 'Statistics.Sample' module.
-- Where this is the case, the alternatives are noted.
module Statistics.Sample.Powers
(
-- * Types
Powers
-- * Constructor
, powers
-- * Descriptive functions
, order
, count
, sum
-- * Statistics of location
, mean
-- * Statistics of dispersion
, variance
, stdDev
, varianceUnbiased
-- * Functions over central moments
, centralMoment
, skewness
, kurtosis
-- * References
-- $references
) where
import Data.Vector.Generic (unsafeFreeze)
import Data.Vector.Unboxed ((!))
import Prelude hiding (sum)
import Statistics.Function (indexed)
import Statistics.Internal (inlinePerformIO)
import Statistics.Math (choose)
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed.Mutable as MU
newtype Powers = Powers (U.Vector Double)
deriving (Eq, Show)
-- | O(/n/) Collect the /n/ simple powers of a sample.
--
-- Functions computed over a sample's simple powers require at least a
-- certain number (or /order/) of powers to be collected.
--
-- * To compute the /k/th 'centralMoment', at least /k/ simple powers
-- must be collected.
--
-- * For the 'variance', at least 2 simple powers are needed.
--
-- * For 'skewness', we need at least 3 simple powers.
--
-- * For 'kurtosis', at least 4 simple powers are required.
--
-- This function is subject to stream fusion.
powers :: G.Vector v Double =>
Int -- ^ /n/, the number of powers, where /n/ >= 2.
-> v Double
-> Powers
powers k
| k < 2 = error "Statistics.Sample.powers: too few powers"
| otherwise = fini . G.foldl' go (unsafePerformIO $ MU.newWith l 0)
where
go ms x = inlinePerformIO $ loop 0 1
where loop !i !xk | i == l = return ms
| otherwise = do
MU.read ms i >>= MU.write ms i . (+ xk)
loop (i+1) (xk*x)
fini = Powers . unsafePerformIO . unsafeFreeze
l = k + 1
{-# INLINE powers #-}
-- | The order (number) of simple powers collected from a 'sample'.
order :: Powers -> Int
order (Powers pa) = U.length pa - 1
{-# INLINE order #-}
-- | Compute the /k/th central moment of a sample. The central
-- moment is also known as the moment about the mean.
centralMoment :: Int -> Powers -> Double
centralMoment k p@(Powers pa)
| k < 0 || k > order p =
error ("Statistics.Sample.Powers.centralMoment: "
++ "invalid argument")
| k == 0 = 1
| otherwise = (/n) . U.sum . U.map go . indexed . U.take (k+1) $ pa
where
go (i , e) = (k `choose` i) * ((-m) ^ (k-i)) * e
n = U.head pa
m = mean p
{-# INLINE centralMoment #-}
-- | Maximum likelihood estimate of a sample's variance. Also known
-- as the population variance, where the denominator is /n/. This is
-- the second central moment of the sample.
--
-- This is less numerically robust than the variance function in the
-- 'Statistics.Sample' module, but the number is essentially free to
-- compute if you have already collected a sample's simple powers.
--
-- Requires 'Powers' with 'order' at least 2.
variance :: Powers -> Double
variance = centralMoment 2
{-# INLINE variance #-}
-- | Standard deviation. This is simply the square root of the
-- maximum likelihood estimate of the variance.
stdDev :: Powers -> Double
stdDev = sqrt . variance
{-# INLINE stdDev #-}
-- | Unbiased estimate of a sample's variance. Also known as the
-- sample variance, where the denominator is /n/-1.
--
-- Requires 'Powers' with 'order' at least 2.
varianceUnbiased :: Powers -> Double
varianceUnbiased p@(Powers pa)
| n > 1 = variance p * n / (n-1)
| otherwise = 0
where n = U.head pa
{-# INLINE varianceUnbiased #-}
-- | Compute the skewness of a sample. This is a measure of the
-- asymmetry of its distribution.
--
-- A sample with negative skew is said to be /left-skewed/. Most of
-- its mass is on the right of the distribution, with the tail on the
-- left.
--
-- > skewness . powers 3 $ U.to [1,100,101,102,103]
-- > ==> -1.497681449918257
--
-- A sample with positive skew is said to be /right-skewed/.
--
-- > skewness . powers 3 $ U.to [1,2,3,4,100]
-- > ==> 1.4975367033335198
--
-- A sample's skewness is not defined if its 'variance' is zero.
--
-- Requires 'Powers' with 'order' at least 3.
skewness :: Powers -> Double
skewness p = centralMoment 3 p * variance p ** (-1.5)
{-# INLINE skewness #-}
-- | Compute the excess kurtosis of a sample. This is a measure of
-- the \"peakedness\" of its distribution. A high kurtosis indicates
-- that the sample's variance is due more to infrequent severe
-- deviations than to frequent modest deviations.
--
-- A sample's excess kurtosis is not defined if its 'variance' is
-- zero.
--
-- Requires 'Powers' with 'order' at least 4.
kurtosis :: Powers -> Double
kurtosis p = centralMoment 4 p / (v * v) - 3
where v = variance p
{-# INLINE kurtosis #-}
-- | The number of elements in the original 'Sample'. This is the
-- sample's zeroth simple power.
count :: Powers -> Int
count (Powers pa) = floor $ U.head pa
{-# INLINE count #-}
-- | The sum of elements in the original 'Sample'. This is the
-- sample's first simple power.
sum :: Powers -> Double
sum (Powers pa) = pa ! 1
{-# INLINE sum #-}
-- | The arithmetic mean of elements in the original 'Sample'.
--
-- This is less numerically robust than the mean function in the
-- 'Statistics.Sample' module, but the number is essentially free to
-- compute if you have already collected a sample's simple powers.
mean :: Powers -> Double
mean p@(Powers pa)
| n == 0 = 0
| otherwise = sum p / n
where n = U.head pa
{-# INLINE mean #-}
-- $references
--
-- * Besset, D.H. (2000) Elements of statistics.
-- /Object-oriented implementation of numerical methods/
-- ch. 9, pp. 311–331.
--
--
-- * Anderson, G. (2009) Compute /k/th central moments in one
-- pass. /quantblog/.