Statistics/Distribution/Binomial.hs

{-# LANGUAGE DeriveDataTypeable #-}
-- |
-- Module    : Statistics.Distribution.Binomial
-- Copyright : (c) 2009 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- The binomial distribution.  This is the discrete probability
-- distribution of the number of successes in a sequence of /n/
-- independent yes\/no experiments, each of which yields success with
-- probability /p/.

module Statistics.Distribution.Binomial
    (
      BinomialDistribution
    -- * Constructors
    , binomial
    -- * Accessors
    , bdTrials
    , bdProbability
    ) where

import Control.Exception (assert)
import qualified Data.Vector.Unboxed as U
import Data.Int (Int64)
import Data.Typeable (Typeable)
import Statistics.Constants (m_epsilon)
import qualified Statistics.Distribution as D
import Statistics.Distribution.Normal (standard)
import Statistics.Math (choose, logFactorial)

-- | The binomial distribution.
data BinomialDistribution = BD {
      bdTrials      :: {-# UNPACK #-} !Int
    -- ^ Number of trials.
    , bdProbability :: {-# UNPACK #-} !Double
    -- ^ Probability.
    } deriving (Eq, Read, Show, Typeable)

instance D.Distribution BinomialDistribution where
    density    = density
    cumulative = cumulative
    quantile   = quantile

instance D.Variance BinomialDistribution where
    variance = variance

instance D.Mean BinomialDistribution where
    mean = mean

density :: BinomialDistribution -> Double -> Double
density (BD n p) x
    | not (isIntegral x) = integralError "density"
    | n == 0             = 1
    | x < 0 || x > n'    = 0
    | n <= 50 || x < 2   = sign * p'' ** x' * (n `choose` fx) * q'' ** nx'
    | otherwise          = sign * exp (x' * log p'' + nx' * log q'' + lf)
  where sign = oddX * oddNX
        (x',p',q') | x > n' / 2 = (n'-x, q, p)
                   | otherwise  = (x,    p, q)
        oddX | p' < 0 && odd fx     = -1
             | otherwise            = 1
        oddNX | q' < 0 && odd nx    = -1
              | otherwise           = 1
        p'' = abs p'
        q'' = abs q'
        q   = 1 - p
        nx  = n - fx
        nx' = fromIntegral nx
        fx  = floor x'
        n'  = fromIntegral n
        lf  = logFactorial n - logFactorial nx - logFactorial fx

cumulative :: BinomialDistribution -> Double -> Double
cumulative d x
  | isIntegral x = U.sum . U.map (density d . fromIntegral) . U.enumFromTo (0::Int) . floor $ x
  | otherwise    = integralError "cumulative"

isIntegral :: Double -> Bool
isIntegral x = x == floorf x

floorf :: Double -> Double
floorf = fromIntegral . (floor :: Double -> Int64)

quantile :: BinomialDistribution -> Double -> Double
quantile dist@(BD n p) prob
    | isNaN prob = prob
    | p == 1     = n'
    | n' < 1e5   = fst (search 1 y0 z0)
    | otherwise  = let dy = floorf (n' / 1000)
                   in  narrow dy (search dy y0 z0)
  where q  = 1 - p
        n' = fromIntegral n
        y0 = n' `min` floorf (µ + σ * (d + γ * (d * d - 1) / 6) + 0.5)
          where µ  = n' * p
                σ  = sqrt (n' * p * q)
                d = D.quantile standard prob
                γ  = (q - p) / σ
        z0 = cumulative dist y0
        search dy y1 z1 | z0 >= prob' = left y1 z1
                        | otherwise   = right y1
          where
            prob' = prob * (1 - 64 * m_epsilon)
            left y oldZ | y == 0 || z < prob' = (y, oldZ)
                        | otherwise           = left (max 0 y') z
                where z  = cumulative dist y'
                      y' = y - dy
            right y | y' >= n' || z >= prob' = (y', z)
                    | otherwise              = right y'
                where z  = cumulative dist y'
                      y' = y + dy
        narrow dy (y,z) | dy <= 1 || dy' <= n'/1e15 = y
                        | otherwise                 = narrow dy' (search dy y z)
            where dy' = floorf (dy / 100)

mean :: BinomialDistribution -> Double
mean (BD n p) = fromIntegral n * p
{-# INLINE mean #-}

variance :: BinomialDistribution -> Double
variance (BD n p) = fromIntegral n * p * (1 - p)
{-# INLINE variance #-}

binomial :: Int                 -- ^ Number of trials.
         -> Double              -- ^ Probability.
         -> BinomialDistribution
binomial n p =
    assert (n > 0) .
    assert (p > 0 && p < 1) $
    BD n p
{-# INLINE binomial #-}

integralError :: String -> a
integralError f = error ("Statistics.Distribution.Binomial." ++ f ++
                         ": non-integer-valued input")