{-# LANGUAGE ForeignFunctionInterface #-} -- | Yet another simple module for implementing statistics stuff. module Statistics (sample, samples, normal, normals , stdnormal, stdnormals, Distribution(..), Prob(..) , pdf, pvalue, fromPdf, invcum, worsen , module Control.Monad.Random ) where import Control.Monad.Random import Data.Array.Unboxed foreign import ccall "erf" erf :: Double -> Double newtype Prob = P Double deriving Show instance Random Prob where random g = let (a,g') = randomR (0,1) g in (P a,g') randomR = error "Specifying a range for a probability makes no sense." -- Todo: implement data Distribution = Normal Double Double -- ^ mu and sigma | LogNormal Double Double -- ^ mu and sigma | Uniform Double Double -- ^ from/to inclusive | Empirical Double Double (UArray Int Double) -- ^ Stores a cumulative distribution, centered on first -- param (ref. 'worsen'), and second param is step size deriving (Show,Read) -- add more: StudentsT, SkewNormal, SkewT -- dummy to make deriving Read work instance Read (UArray i d) where -- | Build an empirical probablility distribution by mapping the given probabilities -- to uniformly spaced points starting at 'start' with 'step' points per unit. -- Automatically center on pvalue = 50. fromPdf :: Double -> Double -> [Prob] -> Distribution fromPdf start h ps = let ps' = acc 0 $ map ((/scale) . unprob) $ ps mu = start+(fromIntegral . length . takeWhile (<0.5)) ps'*h-h/2 a = floor ((start-mu)/h) b = a+length ps' scale = sum [ x | P x <- ps] unprob (P x) = x acc _ [] = [1] acc c (x:xs) = let v = c+x in if v >= 1 then [1] else v : acc v xs in Empirical mu h $ listArray (a,b) (0:ps') invcum :: Distribution -> Prob -> Double invcum (Normal mu sigma) (P z) = invcumnorm mu sigma z invcum (LogNormal mu sigma) (P z) = exp $ invcum (Normal mu sigma) (P z) invcum (Uniform a b) (P z) = a+(b-a)*z invcum (Empirical mu h cd) (P z) = let (a,b) = bounds cd in mu+bisect ((cd!).round) (fromIntegral a) (fromIntegral b) z * h -- | Calculate probability of sampling less than x -- (i.e. the cumulative distribution's value in x) pvalue :: Distribution -> Double -> Prob pvalue (Normal mu sigma) x = P (cumnorm mu sigma x) pvalue (Uniform a b) x | x <= a = P 0 | x >= b = P 1 | otherwise = P ((x-a)/(b-a)) pvalue (LogNormal mu sigma) x | x<1e-10 = P 0 -- not sufficient! | otherwise = P (cumlognorm mu sigma x) pvalue (Empirical mu h cd) x = let (a,b) = bounds cd x' = (x-mu)/h in if x' < fromIntegral a then P 0 else if x' >= fromIntegral b then P 1 else let x1 = floor x' x2 = x1+1 y1 = cd!x1 y2 = cd!x2 in P (y1 + (y2-y1)*(x'-fromIntegral x1)) -- general functions for sampling sample :: RandomGen g => Distribution -> Rand g Double sample d = invcum d `fmap` getRandom -- sample (Uniform a b) = getRandomR (a,b) -- todo: replace with general function: samples :: RandomGen g => Distribution -> Rand g [Double] samples (Normal mu sigma) = normals mu sigma samples (LogNormal mu sigma) = map exp `fmap` samples (Normal mu sigma) samples (Uniform a b) = getRandomRs (a,b) samples (Empirical _mu _h _cds) = error "todo: implement 'samples' for Empirical distributions" -- | The probability density function pdf :: Distribution -> Double -> Double -- Prob? pdf (Normal mu sigma) x = exp(negate(square (x-mu)/(2*square sigma)))/(sigma*sqrt(2*pi)) pdf (LogNormal mu sigma) x | x>0 = exp(negate(square (log x-mu)/(2*square sigma)))/(x*sigma*sqrt(2*pi)) | otherwise = 0 pdf (Uniform a b) x | x= a = 1/(b-a) -- interval is open on the right, to work correctly with empirical below | otherwise = 0 pdf (Empirical mu h cd) x = let (a,b) = bounds cd x' = (x-mu)/h in if x' < fromIntegral a then 0 else if x' >= fromIntegral b then 0 else let x1 = floor x' x2 = x1+1 y1 = cd!x1 y2 = cd!x2 in (y2-y1)/h -- ------------------------------ -- Specifics -- ------------------------------ square :: Double -> Double square x = x * x stdnormal :: RandomGen g => Rand g Double stdnormal = normal 0 1 stdnormals :: RandomGen g => Rand g [Double] stdnormals = normals 0 1 normal :: RandomGen g => Double -> Double -> Rand g Double normal mu sigma = do x <- getRandomR (0,1) return (invcumnorm mu sigma x) normals :: RandomGen g => Double -> Double -> Rand g [Double] normals mu sigma = do x <- normal mu sigma xs <- normals mu sigma return (x : xs) -- support invcumnorm :: Double -> Double -> Double -> Double invcumnorm mu sigma z = mu + bisect (cumnorm 0 sigma) (-limit*sigma) (limit*sigma) z bisect :: (Double -> Double) -> Double -> Double -> Double -> Double bisect f a b z = let c = (a+b)/2 cn = f c in if abs (z - cn) < 10*epsilon || abs (a-b) < epsilon then c else if cn > z then bisect f a c z else bisect f c b z cumnorm :: Double -> Double -> Double -> Double cumnorm mu sigma x = 0.5*(1+erf((x-mu)/(sigma*sqrt 2))) cumlognorm :: Double -> Double -> Double -> Double cumlognorm mu sigma x = 0.5+0.5*erf ((log x-mu)/(sigma*sqrt 2)) epsilon, limit :: Double epsilon = 0.0000000001 limit = 4.4 -- | Make a distribution wider by expanding (or contracting, if less than 1) -- its stdev by some factor worsen :: Double -> Distribution -> Distribution worsen d (Normal mu sigma) = Normal mu (sigma*d) worsen d (LogNormal mu sigma) = LogNormal mu (sigma*d) worsen d (Uniform a b) = Uniform (a*(1-d)) (b*d) -- assumption alert? worsen d (Empirical mu h ds) = Empirical mu (h*d) ds