----------------------------------------------------------------------------- -- | -- Module : Data.Summary.Double -- Copyright : Copyright (c) 2010, Patrick Perry -- License : BSD3 -- Maintainer : Patrick Perry -- Stability : experimental -- -- Summary statistics for @Double@s. -- module Data.Summary.Double ( -- * The @Summary@ data type Summary, summary, update, -- * @Summary@ properties sampleSize, sampleMin, sampleMax, sampleMean, sampleSE, sampleVar, sampleSD, sampleCI, ) where import Data.List( foldl' ) import Text.Printf import Data.Summary.Utils -- | A type for storing summary statistics for a data set including -- sample size, min and max values, and first and second moments. data Summary = S {-# UNPACK #-} !Int -- sample size {-# UNPACK #-} !Double -- sample mean {-# UNPACK #-} !Double -- sum of squares {-# UNPACK #-} !Double -- sample min {-# UNPACK #-} !Double -- sample max instance Show Summary where show s@(S n mu _ l h) = printf " sample size: %d" n ++ printf "\n min: %g" l ++ printf "\n max: %g" h ++ printf "\n mean: %g" mu ++ printf "\n SE: %g" (sampleSE s) ++ printf "\n 99%% CI: (%g, %g)" c1 c2 where (c1,c2) = sampleCI 0.99 s -- | Get a summary of a list of values. summary :: [Double] -> Summary summary = foldl' update empty -- | Get an empty summary. empty :: Summary empty = S 0 0 0 (1/0) (-1/0) -- | Update the summary with a data point. -- Running mean and variance computed as in Knuth, Vol 2, page 232, -- 3rd edition, see http://www.johndcook.com/standard_deviation.html for -- a description. update :: Summary -> Double -> Summary update (S n m s l h) x = let n' = n+1 delta = x - m m' = m + delta / fromIntegral n' s' = s + delta*(x - m') l' = if x < l then x else l h' = if x > h then x else h in S n' m' s' l' h' -- | Get the sample size. sampleSize :: Summary -> Int sampleSize (S n _ _ _ _) = n -- | Get the sample mean. sampleMean :: Summary -> Double sampleMean (S _ m _ _ _) = m -- | Get the sample variance. sampleVar :: Summary -> Double sampleVar (S n _ s _ _) = s / fromIntegral (n - 1) -- | Get the sample standard deviation. sampleSD :: Summary -> Double sampleSD s = sqrt (sampleVar s) -- | Get the sample standard error. sampleSE :: Summary -> Double sampleSE s = sqrt (sampleVar s / fromIntegral (sampleSize s)) -- | Get a Central Limit Theorem-based confidence interval for the mean -- with the specified coverage level. The level must be in the range @(0,1)@. sampleCI :: Double -> Summary -> (Double,Double) sampleCI level s = interval level (sampleMean s) (sampleSE s) -- | Get the minimum of the sample. sampleMin :: Summary -> Double sampleMin (S _ _ _ l _) = l -- | Get the maximum of the sample. sampleMax :: Summary -> Double sampleMax (S _ _ _ _ h) = h