{-# LANGUAGE DeriveDataTypeable #-} -- | -- Module : Statistics.Distribution.Hypergeometric -- Copyright : (c) 2009 Bryan O'Sullivan -- License : BSD3 -- -- Maintainer : bos@serpentine.com -- Stability : experimental -- Portability : portable -- -- The Hypergeometric distribution. This is the discrete probability -- distribution that measures the probability of /k/ successes in /l/ -- trials, without replacement, from a finite population. -- -- The parameters of the distribution describe /k/ elements chosen -- from a population of /l/, with /m/ elements of one type, and -- /l/-/m/ of the other (all are positive integers). module Statistics.Distribution.Hypergeometric ( HypergeometricDistribution -- * Constructors , fromParams -- ** Accessors , hdM , hdL , hdK ) where import Control.Exception (assert) import qualified Data.Vector.Unboxed as U import Data.Typeable (Typeable) import Statistics.Math (choose, logFactorial) import Statistics.Constants (m_max_exp) import qualified Statistics.Distribution as D data HypergeometricDistribution = HD { hdM :: {-# UNPACK #-} !Int , hdL :: {-# UNPACK #-} !Int , hdK :: {-# UNPACK #-} !Int } deriving (Eq, Read, Show, Typeable) instance D.Distribution HypergeometricDistribution where density = density cumulative = cumulative quantile = quantile instance D.Variance HypergeometricDistribution where variance = variance instance D.Mean HypergeometricDistribution where mean = mean variance :: HypergeometricDistribution -> Double variance (HD m l k) = (k' * ml) * (1 - ml) * (l' - k') / (l' - 1) where m' = fromIntegral m l' = fromIntegral l k' = fromIntegral k ml = m' / l' {-# INLINE variance #-} mean :: HypergeometricDistribution -> Double mean (HD m l k) = fromIntegral k * fromIntegral m / fromIntegral l {-# INLINE mean #-} fromParams :: Int -- ^ /m/ -> Int -- ^ /l/ -> Int -- ^ /k/ -> HypergeometricDistribution fromParams m l k = assert (m > 0 && m <= l) . assert (l > 0) . assert (k > 0 && k <= l) $ HD m l k {-# INLINE fromParams #-} density :: HypergeometricDistribution -> Double -> Double density (HD mi li ki) x | l <= 70 = (mi <> xi) * ((li - mi) <> (ki - xi)) / (li <> ki) | r > maxVal = 1/0 | otherwise = exp r where a <> b = a `choose` b r = f m + f (l-m) - f l - f xi - f (k-xi) + f k - f (m-xi) - f (l-m-k+xi) + f (l-k) f = logFactorial maxVal = fromIntegral (m_max_exp - 1) * log 2 xi = floor x m = fromIntegral mi l = fromIntegral li k = fromIntegral ki {-# INLINE density #-} cumulative :: HypergeometricDistribution -> Double -> Double cumulative d@(HD m l k) x | x < fromIntegral imin = 0 | x >= fromIntegral imax = 1 | otherwise = min r 1 where imin = max 0 (k - l + m) imax = min k m r = U.sum . U.map (density d . fromIntegral) . U.enumFromTo imin . floor $ x {-# INLINE cumulative #-} quantile :: HypergeometricDistribution -> Double -> Double quantile = error "Statistics.Distribution.Hypergeometric.quantile: not yet implemented" {-# INLINE quantile #-}