{-# LANGUAGE BangPatterns, ScopedTypeVariables #-} -- | -- Module : Statistics.Distribution -- Copyright : (c) 2009 Bryan O'Sullivan -- License : BSD3 -- -- Maintainer : bos@serpentine.com -- Stability : experimental -- Portability : portable -- -- Types classes for probability distrubutions module Statistics.Distribution ( -- * Type classes Distribution(..) , DiscreteDistr(..) , ContDistr(..) -- ** Distribution statistics , MaybeMean(..) , Mean(..) , MaybeVariance(..) , Variance(..) , MaybeEntropy(..) , Entropy(..) -- ** Random number generation , ContGen(..) , DiscreteGen(..) , genContinous -- * Helper functions , findRoot , sumProbabilities ) where import Control.Applicative ((<$>), Applicative(..)) import Control.Monad.Primitive (PrimMonad,PrimState) import Prelude hiding (sum) import Statistics.Function (square) import Statistics.Sample.Internal (sum) import System.Random.MWC (Gen, uniform) import qualified Data.Vector.Unboxed as U -- | Type class common to all distributions. Only c.d.f. could be -- defined for both discrete and continous distributions. class Distribution d where -- | Cumulative distribution function. The probability that a -- random variable /X/ is less or equal than /x/, -- i.e. P(/X/≤/x/). Cumulative should be defined for -- infinities as well: -- -- > cumulative d +∞ = 1 -- > cumulative d -∞ = 0 cumulative :: d -> Double -> Double -- | One's complement of cumulative distibution: -- -- > complCumulative d x = 1 - cumulative d x -- -- It's useful when one is interested in P(/X/ Double -> Double complCumulative d x = 1 - cumulative d x -- | Discrete probability distribution. class Distribution d => DiscreteDistr d where -- | Probability of n-th outcome. probability :: d -> Int -> Double probability d = exp . logProbability d -- | Logarithm of probability of n-th outcome logProbability :: d -> Int -> Double logProbability d = log . probability d -- | Continuous probability distributuion. -- -- Minimal complete definition is 'quantile' and either 'density' or -- 'logDensity'. class Distribution d => ContDistr d where -- | Probability density function. Probability that random -- variable /X/ lies in the infinitesimal interval -- [/x/,/x+/δ/x/) equal to /density(x)/⋅δ/x/ density :: d -> Double -> Double density d = exp . logDensity d -- | Inverse of the cumulative distribution function. The value -- /x/ for which P(/X/≤/x/) = /p/. If probability is outside -- of [0,1] range function should call 'error' quantile :: d -> Double -> Double -- | Natural logarithm of density. logDensity :: d -> Double -> Double logDensity d = log . density d -- | Type class for distributions with mean. 'maybeMean' should return -- 'Nothing' if it's undefined for current value of data class Distribution d => MaybeMean d where maybeMean :: d -> Maybe Double -- | Type class for distributions with mean. If distribution have -- finite mean for all valid values of parameters it should be -- instance of this type class. class MaybeMean d => Mean d where mean :: d -> Double -- | Type class for distributions with variance. If variance is -- undefined for some parameter values both 'maybeVariance' and -- 'maybeStdDev' should return Nothing. -- -- Minimal complete definition is 'maybeVariance' or 'maybeStdDev' class MaybeMean d => MaybeVariance d where maybeVariance :: d -> Maybe Double maybeVariance d = (*) <$> x <*> x where x = maybeStdDev d maybeStdDev :: d -> Maybe Double maybeStdDev = fmap sqrt . maybeVariance -- | Type class for distributions with variance. If distibution have -- finite variance for all valid parameter values it should be -- instance of this type class. -- -- Minimal complete definition is 'variance' or 'stdDev' class (Mean d, MaybeVariance d) => Variance d where variance :: d -> Double variance d = square (stdDev d) stdDev :: d -> Double stdDev = sqrt . variance -- | Type class for distributions with entropy, meaning Shannon entropy -- in the case of a discrete distribution, or differential entropy in the -- case of a continuous one. 'maybeEntropy' should return 'Nothing' if -- entropy is undefined for the chosen parameter values. class (Distribution d) => MaybeEntropy d where -- | Returns the entropy of a distribution, in nats, if such is defined. maybeEntropy :: d -> Maybe Double -- | Type class for distributions with entropy, meaning Shannon -- entropy in the case of a discrete distribution, or differential -- entropy in the case of a continuous one. If the distribution has -- well-defined entropy for all valid parameter values then it -- should be an instance of this type class. class (MaybeEntropy d) => Entropy d where -- | Returns the entropy of a distribution, in nats. entropy :: d -> Double -- | Generate discrete random variates which have given -- distribution. class Distribution d => ContGen d where genContVar :: PrimMonad m => d -> Gen (PrimState m) -> m Double -- | Generate discrete random variates which have given -- distribution. 'ContGen' is superclass because it's always possible -- to generate real-valued variates from integer values class (DiscreteDistr d, ContGen d) => DiscreteGen d where genDiscreteVar :: PrimMonad m => d -> Gen (PrimState m) -> m Int -- | Generate variates from continous distribution using inverse -- transform rule. genContinous :: (ContDistr d, PrimMonad m) => d -> Gen (PrimState m) -> m Double genContinous d gen = do x <- uniform gen return $! quantile d x data P = P {-# UNPACK #-} !Double {-# UNPACK #-} !Double -- | Approximate the value of /X/ for which P(/x/>/X/)=/p/. -- -- This method uses a combination of Newton-Raphson iteration and -- bisection with the given guess as a starting point. The upper and -- lower bounds specify the interval in which the probability -- distribution reaches the value /p/. findRoot :: ContDistr d => d -- ^ Distribution -> Double -- ^ Probability /p/ -> Double -- ^ Initial guess -> Double -- ^ Lower bound on interval -> Double -- ^ Upper bound on interval -> Double findRoot d prob = loop 0 1 where loop !(i::Int) !dx !x !lo !hi | abs dx <= accuracy || i >= maxIters = x | otherwise = loop (i+1) dx'' x'' lo' hi' where err = cumulative d x - prob P lo' hi' | err < 0 = P x hi | otherwise = P lo x pdf = density d x P dx' x' | pdf /= 0 = P (err / pdf) (x - dx) | otherwise = P dx x P dx'' x'' | x' < lo' || x' > hi' || pdf == 0 = let y = (lo' + hi') / 2 in P (y-x) y | otherwise = P dx' x' accuracy = 1e-15 maxIters = 150 -- | Sum probabilities in inclusive interval. sumProbabilities :: DiscreteDistr d => d -> Int -> Int -> Double sumProbabilities d low hi = -- Return value is forced to be less than 1 to guard againist roundoff errors. -- ATTENTION! this check should be removed for testing or it could mask bugs. min 1 . sum . U.map (probability d) $ U.enumFromTo low hi