{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE BangPatterns #-} -- | -- Module : Statistics.Sample.Powers -- Copyright : (c) 2009, 2010 Bryan O'Sullivan -- License : BSD3 -- -- Maintainer : bos@serpentine.com -- Stability : experimental -- Portability : portable -- -- Very fast statistics over simple powers of a sample. These can all -- be computed efficiently in just a single pass over a sample, with -- that pass subject to stream fusion. -- -- The tradeoff is that some of these functions are less numerically -- robust than their counterparts in the 'Statistics.Sample' module. -- Where this is the case, the alternatives are noted. module Statistics.Sample.Powers ( -- * Types Powers -- * Constructor , powers -- * Descriptive functions , order , count , sum -- * Statistics of location , mean -- * Statistics of dispersion , variance , stdDev , varianceUnbiased -- * Functions over central moments , centralMoment , skewness , kurtosis -- * References -- $references ) where import Data.Vector.Generic (unsafeFreeze) import Data.Vector.Unboxed ((!)) import Prelude hiding (sum) import Statistics.Function (indexed) import Statistics.Internal (inlinePerformIO) import Statistics.Math (choose) import System.IO.Unsafe (unsafePerformIO) import qualified Data.Vector.Unboxed as U import qualified Data.Vector.Generic as G import qualified Data.Vector.Unboxed.Mutable as MU newtype Powers = Powers (U.Vector Double) deriving (Eq, Show) -- | O(/n/) Collect the /n/ simple powers of a sample. -- -- Functions computed over a sample's simple powers require at least a -- certain number (or /order/) of powers to be collected. -- -- * To compute the /k/th 'centralMoment', at least /k/ simple powers -- must be collected. -- -- * For the 'variance', at least 2 simple powers are needed. -- -- * For 'skewness', we need at least 3 simple powers. -- -- * For 'kurtosis', at least 4 simple powers are required. -- -- This function is subject to stream fusion. powers :: G.Vector v Double => Int -- ^ /n/, the number of powers, where /n/ >= 2. -> v Double -> Powers powers k | k < 2 = error "Statistics.Sample.powers: too few powers" | otherwise = fini . G.foldl' go (unsafePerformIO $ MU.replicate l 0) where go ms x = inlinePerformIO $ loop 0 1 where loop !i !xk | i == l = return ms | otherwise = do MU.read ms i >>= MU.write ms i . (+ xk) loop (i+1) (xk*x) fini = Powers . unsafePerformIO . unsafeFreeze l = k + 1 {-# INLINE powers #-} -- | The order (number) of simple powers collected from a 'sample'. order :: Powers -> Int order (Powers pa) = U.length pa - 1 {-# INLINE order #-} -- | Compute the /k/th central moment of a sample. The central -- moment is also known as the moment about the mean. centralMoment :: Int -> Powers -> Double centralMoment k p@(Powers pa) | k < 0 || k > order p = error ("Statistics.Sample.Powers.centralMoment: " ++ "invalid argument") | k == 0 = 1 | otherwise = (/n) . U.sum . U.map go . indexed . U.take (k+1) $ pa where go (i , e) = (k `choose` i) * ((-m) ^ (k-i)) * e n = U.head pa m = mean p {-# INLINE centralMoment #-} -- | Maximum likelihood estimate of a sample's variance. Also known -- as the population variance, where the denominator is /n/. This is -- the second central moment of the sample. -- -- This is less numerically robust than the variance function in the -- 'Statistics.Sample' module, but the number is essentially free to -- compute if you have already collected a sample's simple powers. -- -- Requires 'Powers' with 'order' at least 2. variance :: Powers -> Double variance = centralMoment 2 {-# INLINE variance #-} -- | Standard deviation. This is simply the square root of the -- maximum likelihood estimate of the variance. stdDev :: Powers -> Double stdDev = sqrt . variance {-# INLINE stdDev #-} -- | Unbiased estimate of a sample's variance. Also known as the -- sample variance, where the denominator is /n/-1. -- -- Requires 'Powers' with 'order' at least 2. varianceUnbiased :: Powers -> Double varianceUnbiased p@(Powers pa) | n > 1 = variance p * n / (n-1) | otherwise = 0 where n = U.head pa {-# INLINE varianceUnbiased #-} -- | Compute the skewness of a sample. This is a measure of the -- asymmetry of its distribution. -- -- A sample with negative skew is said to be /left-skewed/. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- -- > skewness . powers 3 $ U.to [1,100,101,102,103] -- > ==> -1.497681449918257 -- -- A sample with positive skew is said to be /right-skewed/. -- -- > skewness . powers 3 $ U.to [1,2,3,4,100] -- > ==> 1.4975367033335198 -- -- A sample's skewness is not defined if its 'variance' is zero. -- -- Requires 'Powers' with 'order' at least 3. skewness :: Powers -> Double skewness p = centralMoment 3 p * variance p ** (-1.5) {-# INLINE skewness #-} -- | Compute the excess kurtosis of a sample. This is a measure of -- the \"peakedness\" of its distribution. A high kurtosis indicates -- that the sample's variance is due more to infrequent severe -- deviations than to frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its 'variance' is -- zero. -- -- Requires 'Powers' with 'order' at least 4. kurtosis :: Powers -> Double kurtosis p = centralMoment 4 p / (v * v) - 3 where v = variance p {-# INLINE kurtosis #-} -- | The number of elements in the original 'Sample'. This is the -- sample's zeroth simple power. count :: Powers -> Int count (Powers pa) = floor $ U.head pa {-# INLINE count #-} -- | The sum of elements in the original 'Sample'. This is the -- sample's first simple power. sum :: Powers -> Double sum (Powers pa) = pa ! 1 {-# INLINE sum #-} -- | The arithmetic mean of elements in the original 'Sample'. -- -- This is less numerically robust than the mean function in the -- 'Statistics.Sample' module, but the number is essentially free to -- compute if you have already collected a sample's simple powers. mean :: Powers -> Double mean p@(Powers pa) | n == 0 = 0 | otherwise = sum p / n where n = U.head pa {-# INLINE mean #-} -- $references -- -- * Besset, D.H. (2000) Elements of statistics. -- /Object-oriented implementation of numerical methods/ -- ch. 9, pp. 311–331. -- -- -- * Anderson, G. (2009) Compute /k/th central moments in one -- pass. /quantblog/.