{- Copyright (C) 2011-2013 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] Functions for probability-distributions. [@CAVEAT@] Because data-constructors are exposed, 'ToolShed.SelfValidate.isValid' need not be called. -} module Factory.Math.Probability( -- * Type-classes Distribution(..), -- * Types -- ** Data-types ContinuousDistribution(..), DiscreteDistribution(..), -- * Functions maxPreciseInteger, -- minPositiveFloat, boxMullerTransform, -- reProfile, generateStandardizedNormalDistribution, generateContinuousPopulation, -- generatePoissonDistribution, generateDiscretePopulation ) where import qualified Control.Arrow import Control.Arrow((***), (&&&)) import qualified Factory.Data.Interval as Data.Interval import qualified Factory.Math.Power as Math.Power import qualified System.Random import qualified ToolShed.Data.List import qualified ToolShed.Data.Pair import qualified ToolShed.SelfValidate -- | The maximum integer which can be accurately represented as a Double. maxPreciseInteger :: RealFloat a => a -> Integer maxPreciseInteger = (2 ^) . floatDigits {- | * Determines the minimum positive floating-point number, which can be represented by using the parameter's type. * Only the type of the parameter is relevant, not its value. -} minPositiveFloat :: RealFloat a => a -> a minPositiveFloat = encodeFloat 1 . uncurry (-) . (fst . floatRange &&& floatDigits) -- | Describes /continuous probability-distributions/; . data ContinuousDistribution parameter = ExponentialDistribution parameter {-lambda-} -- ^ Defines an /Exponential/-distribution with a particular /lambda/; . | LogNormalDistribution parameter {-location-} parameter {-scale2-} -- ^ Defines a distribution whose logarithm is normally distributed with a particular /mean/ & /variance/; . | NormalDistribution parameter {-mean-} parameter {-variance-} -- ^ Defines a /Normal/-distribution with a particular /mean/ & /variance/; . | UniformDistribution (Data.Interval.Interval parameter) -- ^ Defines a /Uniform/-distribution within a /closed interval/; . deriving (Eq, Read, Show) instance (Floating parameter, Ord parameter, Show parameter) => ToolShed.SelfValidate.SelfValidator (ContinuousDistribution parameter) where getErrors probabilityDistribution = ToolShed.SelfValidate.extractErrors $ case probabilityDistribution of ExponentialDistribution lambda -> [(lambda <= 0, "'lambda' must exceed zero; " ++ show probabilityDistribution ++ ".")] LogNormalDistribution location scale2 -> let maxParameter = log . fromInteger $ maxPreciseInteger (undefined :: Double) in [ (scale2 <= 0, "'scale' must exceed zero; " ++ show probabilityDistribution ++ "."), (location > maxParameter || scale2 > maxParameter, "loss of precision will result from either 'location' or 'scale^2' exceeding '" ++ show maxParameter ++ "'; " ++ show probabilityDistribution ++ ".") ] NormalDistribution _ variance -> [(variance <= 0, "variance must exceed zero; " ++ show probabilityDistribution ++ ".")] UniformDistribution interval -> [(Data.Interval.isReversed interval, "reversed interval='" ++ show probabilityDistribution ++ "'.")] -- | Describes /discrete probability-distributions/; . data DiscreteDistribution parameter = PoissonDistribution parameter {-lambda-} -- ^ Defines an /Poisson/-distribution with a particular /lambda/; . | ShiftedGeometricDistribution parameter {-probability-} -- ^ Defines an /Geometric/-distribution with a particular probability of success; . deriving (Eq, Read, Show) instance (Num parameter, Ord parameter, Show parameter) => ToolShed.SelfValidate.SelfValidator (DiscreteDistribution parameter) where getErrors probabilityDistribution = ToolShed.SelfValidate.extractErrors $ case probabilityDistribution of PoissonDistribution lambda -> [(lambda <= 0, "'lambda' must exceed zero; " ++ show probabilityDistribution ++ ".")] ShiftedGeometricDistribution probability -> [(any ($ probability) [(<= 0), (> 1)], "probability must be in the semi-closed unit-interval (0, 1]; " ++ show probabilityDistribution ++ ".")] -- | Defines a common interface for probability-distributions. class Distribution probabilityDistribution where generatePopulation :: (Fractional sample, System.Random.RandomGen randomGen) => probabilityDistribution -> randomGen -- ^ A generator of /uniformly distributed/ random numbers. -> [sample] -- ^ CAVEAT: the integers generated for discrete distributions are represented by a fractional type; use 'generateDiscretePopulation' if this is a problem. getMean :: Fractional mean => probabilityDistribution -> mean -- ^ The theoretical mean. getStandardDeviation :: Floating standardDeviation => probabilityDistribution -> standardDeviation-- ^ The theoretical standard-deviation. getStandardDeviation = sqrt . getVariance --Default implementation. getVariance :: Floating variance => probabilityDistribution -> variance -- ^ The theoretical variance. getVariance = Math.Power.square . getStandardDeviation --Default implementation. instance (RealFloat parameter, Show parameter, System.Random.Random parameter) => Distribution (ContinuousDistribution parameter) where generatePopulation probabilityDistribution = map realToFrac {-parameter -> sample-} . generateContinuousPopulation probabilityDistribution getMean (ExponentialDistribution lambda) = realToFrac $ recip lambda getMean (LogNormalDistribution location scale2) = realToFrac . exp . (+ location) $ scale2 / 2 getMean (NormalDistribution mean _) = realToFrac mean getMean (UniformDistribution (minParameter, maxParameter)) = realToFrac $ (minParameter + maxParameter) / 2 getVariance (ExponentialDistribution lambda) = realToFrac . recip $ Math.Power.square lambda getVariance (LogNormalDistribution location scale2) = realToFrac $ (exp scale2 - 1) * exp (2 * location + scale2) getVariance (NormalDistribution _ variance) = realToFrac variance getVariance (UniformDistribution (minParameter, maxParameter)) = realToFrac $ Math.Power.square (maxParameter - minParameter) / 12 instance (RealFloat parameter, Show parameter, System.Random.Random parameter) => Distribution (DiscreteDistribution parameter) where generatePopulation probabilityDistribution = map fromInteger . generateDiscretePopulation probabilityDistribution getMean (PoissonDistribution lambda) = realToFrac lambda getMean (ShiftedGeometricDistribution probability) = realToFrac $ recip probability getVariance (PoissonDistribution lambda) = realToFrac lambda getVariance (ShiftedGeometricDistribution probability) = realToFrac $ (1 - probability) / Math.Power.square probability {- | * Converts a pair of independent /uniformly distributed/ random numbers, within the /semi-closed unit interval/ /(0,1]/, to a pair of independent /normally distributed/ random numbers, of standardized /mean/=0, and /variance/=1. * . -} boxMullerTransform :: ( Floating f, Ord f, Show f ) => (f, f) -- ^ Independent, /uniformly distributed/ random numbers, which must be within the /semi-closed unit interval/, /(0,1]/. -> (f, f) -- ^ Independent, /normally distributed/ random numbers, with standardized /mean/=0 and /variance/=1. boxMullerTransform cartesian | not . uncurry (&&) $ ToolShed.Data.Pair.mirror inSemiClosedUnitInterval cartesian = error $ "Factory.Math.Probability.boxMullerTransform:\tspecified Cartesian coordinates, must be within semi-closed unit-interval (0, 1]; " ++ show cartesian | otherwise = polarToCartesianTransform $ (sqrt . negate . (* 2) . log *** (* 2) . (* pi)) cartesian where inSemiClosedUnitInterval :: (Num n, Ord n) => n -> Bool inSemiClosedUnitInterval = uncurry (&&) . ((> 0) &&& (<= 1)) polarToCartesianTransform :: Floating f => (f, f) -> (f, f) polarToCartesianTransform = uncurry (*) . Control.Arrow.second cos &&& uncurry (*) . Control.Arrow.second sin {- | * Uses the supplied random-number generator, to generate a conceptually infinite list, of /normally distributed/ random numbers, with standardized /mean/=0, and /variance/=1. * , . -} generateStandardizedNormalDistribution :: ( RealFloat f, Show f, System.Random.Random f, System.Random.RandomGen randomGen ) => randomGen -> [f] generateStandardizedNormalDistribution = ToolShed.Data.List.linearise . uncurry (zipWith $ curry boxMullerTransform) . ToolShed.Data.Pair.mirror ( System.Random.randomRs (minPositiveFloat undefined, 1) ) . System.Random.split -- | Stretches and shifts a /distribution/ to achieve the required /mean/ and /standard-deviation/. reProfile :: (Distribution distribution, Floating n) => distribution -> [n] -> [n] reProfile distribution = map ((+ getMean distribution) . (* getStandardDeviation distribution)) -- | Uses the supplied random-number generator, to generate a conceptually infinite population, with the specified continuous probability-distribution. generateContinuousPopulation :: ( RealFloat f, Show f, System.Random.Random f, System.Random.RandomGen randomGen ) => ContinuousDistribution f -> randomGen -- ^ A generator of /uniformly distributed/ random numbers. -> [f] generateContinuousPopulation probabilityDistribution randomGen | not $ ToolShed.SelfValidate.isValid probabilityDistribution = error $ "Factory.Math.Probability.generateContinuousPopulation:\t" ++ ToolShed.SelfValidate.getFirstError probabilityDistribution | otherwise = ( case probabilityDistribution of ExponentialDistribution lambda -> let quantile = (/ lambda) . negate . log . (1 -) -- . in map quantile . System.Random.randomRs (0, 1) LogNormalDistribution location scale2 -> map ( exp . (+ location) . (* sqrt scale2) --Stretch the standard-deviation & re-locate the mean to that specified for the log-space, then return to the original coordinates. ) . generateStandardizedNormalDistribution NormalDistribution _ _ -> reProfile probabilityDistribution . generateStandardizedNormalDistribution UniformDistribution interval -> System.Random.randomRs interval ) randomGen {- | * Uses the supplied random-number generator, to generate a conceptually infinite population, of random integers conforming to the /Poisson distribution/; . * CAVEAT: uses an algorithm by Knuth, which having a /linear time-complexity/ in /lambda/, can be intolerably slow; also, the term @exp $ negate lambda@, underflows for large /lambda/; so for large /lambda/, this implementation returns the appropriate 'NormalDistribution'. -} generatePoissonDistribution :: ( Integral sample, RealFloat lambda, Show lambda, System.Random.Random lambda, System.Random.RandomGen randomGen ) => lambda -- ^ Defines the required approximate value of both /mean/ and /variance/. -> randomGen -> [sample] generatePoissonDistribution lambda | lambda <= 0 = error $ "Factory.Math.Probability.generatePoissonDistribution:\tlambda must exceed zero " ++ show lambda | lambda > ( negate . log $ minPositiveFloat lambda --Guard against underflow, in the user-defined type for lambda. ) = filter (>= 0) . map round . (reProfile (PoissonDistribution lambda) :: [Double] -> [Double]) . generateStandardizedNormalDistribution | otherwise = generator where generator = uncurry (:) . ( fst . head . dropWhile ( (> exp (negate lambda)) . snd --CAVEAT: underflows if lambda > (103 :: Float, 745 :: Double). ) . scanl ( \accumulator random -> succ *** (* random) $ accumulator ) (negate 1, 1) . System.Random.randomRs (0, 1) *** generator {-recurse-} ) . System.Random.split -- | Uses the supplied random-number generator, to generate a conceptually infinite population, with the specified discrete probability-distribution. generateDiscretePopulation :: ( Integral sample, Ord parameter, RealFloat parameter, Show parameter, System.Random.Random parameter, System.Random.RandomGen randomGen ) => DiscreteDistribution parameter -> randomGen -- ^ A generator of /uniformly distributed/ random numbers. -> [sample] generateDiscretePopulation probabilityDistribution randomGen | not $ ToolShed.SelfValidate.isValid probabilityDistribution = error $ "Factory.Math.Probability.generateDiscretePopulation:\t" ++ ToolShed.SelfValidate.getFirstError probabilityDistribution | otherwise = ( case probabilityDistribution of PoissonDistribution lambda -> generatePoissonDistribution lambda ShiftedGeometricDistribution probability | probability == 1 -> const $ repeat 1 --The first Bernoulli Trial is guaranteed to succeed. | otherwise -> map ceiling {-minimum 1-} . (\x -> x :: [Rational]) . generatePopulation (ExponentialDistribution . negate $ log (1 - probability)) --The geometric distribution is a discrete version of the exponential distribution. ) randomGen