-- | Statistics for calculating probabilities from the binomial distribution module Statistics where import Test.QuickCheck hiding (choose) -- | Number of combinations for choosing k from a set of n objects. Needs to use Integer to -- avoid running out of Int bits. choose :: Integral i => i -> i -> Integer n' `choose` k' = let (n,k) = (fromIntegral n', fromIntegral k') in product [k+1..n] `div` product [1..n-k] -- | Calculate binomial probability of observing exactly k positives from n trials, -- with a probability p for a positive result in each trial. binomial :: Integral i => Double -> i -> i -> Double binomial p n k -- | p*fromIntegral n > 20 && (1-p)*fromIntegral n > 20 = normal_apporx p n k | k <= n = fromIntegral (n `choose` k) * (p**fromIntegral k) * ((1-p)** fromIntegral (n-k)) | otherwise = error ("binomial: can't observe more than 'n' positives:"++show n++" "++show k) prop_binom p n k = k <= n && p>=0 && p <= 1 ==> sum [ binomial p n k | k <- [0..n]] - 1 < 0.001 -- | Calculate cumulative binomial probability of achieving k or fewer positive results. cumbin :: Integral i => Double -> i -> i -> Double cumbin p n k = sum [binomial p n x | x <- [0..k]] prop_cumbin p n k = k <= n && n-k-1 <= n && p>=0 && p <= 1 ==> cumbin p n k + cumbin (1-p) n (n-k-1) - 1 < 0.001 {- crap! this won't work, you can't just sum normals, goddamit. normal_approx p n k = pdf_normal (fromIntegral n*p) (fromIntegral n*p*(1-p)) (fromIntegral k) pdf_normal mu sigma x = exp(negate((x-mu)^2/(2*sigma^2)))/(sigma*sqrt(2*pi)) invcumnorm mu sigma z = mu + search (-limit*sigma) (limit*sigma) where search a b = let c = (a+b)/2 cn = cumnorm 0 sigma c in if abs (z - cn) < 10*epsilon || abs (a-b) < epsilon then c else if cn > z then search a c else search c b -}