{-
	Copyright (C) 2011-2015 Dr. Alistair Ward

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <https://www.gnu.org/licenses/>.
-}
{- |
 [@AUTHOR@]	Dr. Alistair Ward

 [@DESCRIPTION@]	Functions for probability-distributions.

 [@CAVEAT@]	Because data-constructors are exposed, 'ToolShed.SelfValidate.isValid' need not be called.
-}

module Factory.Math.Probability(
-- * Type-classes
        Distribution(..),
-- * Types
-- ** Data-types
        ContinuousDistribution(..),
        DiscreteDistribution(..),
-- * Functions
        maxPreciseInteger,
--	minPositiveFloat,
        boxMullerTransform,
--	reProfile,
        generateStandardizedNormalDistribution,
        generateContinuousPopulation,
--	generatePoissonDistribution,
        generateDiscretePopulation
) where

import qualified        Control.Arrow
import                  Control.Arrow((***), (&&&))
import qualified        Factory.Data.Interval   as Data.Interval
import qualified        Factory.Math.Power      as Math.Power
import qualified        System.Random
import qualified        ToolShed.Data.List
import qualified        ToolShed.Data.Pair
import qualified        ToolShed.SelfValidate

-- | The maximum integer which can be accurately represented as a Double.
maxPreciseInteger :: RealFloat a => a -> Integer
maxPreciseInteger       = (2 ^) . floatDigits

{- |
	* Determines the minimum positive floating-point number, which can be represented by using the parameter's type.

	* Only the type of the parameter is relevant, not its value.
-}
minPositiveFloat :: RealFloat a => a -> a
minPositiveFloat        = encodeFloat 1 . uncurry (-) . (fst . floatRange &&& floatDigits)

-- | Describes /continuous probability-distributions/; <https://en.wikipedia.org/wiki/List_of_probability_distributions#Continuous_distributions>.
data ContinuousDistribution parameter
        = ExponentialDistribution parameter {-lambda-}                          -- ^ Defines an /Exponential/-distribution with a particular /lambda/; <https://en.wikipedia.org/wiki/Exponential_distribution>.
        | LogNormalDistribution parameter {-location-} parameter {-scale2-}     -- ^ Defines a distribution whose logarithm is normally distributed with a particular /mean/ & /variance/; <https://en.wikipedia.org/wiki/Lognormal>.
        | NormalDistribution parameter {-mean-} parameter {-variance-}          -- ^ Defines a /Normal/-distribution with a particular /mean/ & /variance/; <https://en.wikipedia.org/wiki/Normal_distribution>.
        | UniformDistribution (Data.Interval.Interval parameter)                -- ^ Defines a /Uniform/-distribution within a /closed interval/; <https://en.wikipedia.org/wiki/Uniform_distribution>.
        deriving (Eq, Read, Show)

instance (Floating parameter, Ord parameter, Show parameter) => ToolShed.SelfValidate.SelfValidator (ContinuousDistribution parameter)  where
        getErrors probabilityDistribution       = ToolShed.SelfValidate.extractErrors $ case probabilityDistribution of
                ExponentialDistribution lambda          -> [(lambda <= 0, "'lambda' must exceed zero; " ++ show probabilityDistribution ++ ".")]
                LogNormalDistribution location scale2   -> let
                        maxParameter    = log . fromInteger $ maxPreciseInteger (undefined :: Double)
                 in [
                        (scale2 <= 0,                                           "'scale' must exceed zero; " ++ show probabilityDistribution ++ "."),
                        (location > maxParameter || scale2 > maxParameter,      "loss of precision will result from either 'location' or 'scale^2' exceeding '" ++ show maxParameter ++ "'; " ++ show probabilityDistribution ++ ".")
                 ]
                NormalDistribution _ variance           -> [(variance <= 0, "variance must exceed zero; " ++ show probabilityDistribution ++ ".")]
                UniformDistribution interval            -> [(Data.Interval.isReversed interval, "reversed interval='" ++ show probabilityDistribution ++ "'.")]

-- | Describes /discrete probability-distributions/; <https://en.wikipedia.org/wiki/List_of_probability_distributions#Discrete_distributions>.
data DiscreteDistribution parameter
        = PoissonDistribution parameter {-lambda-}                      -- ^ Defines an /Poisson/-distribution with a particular /lambda/; <https://en.wikipedia.org/wiki/Poisson_distribution>.
        | ShiftedGeometricDistribution parameter {-probability-}        -- ^ Defines an /Geometric/-distribution with a particular probability of success; <https://en.wikipedia.org/wiki/Geometric_distribution>.
        deriving (Eq, Read, Show)

instance (Num parameter, Ord parameter, Show parameter) => ToolShed.SelfValidate.SelfValidator (DiscreteDistribution parameter) where
        getErrors probabilityDistribution       = ToolShed.SelfValidate.extractErrors $ case probabilityDistribution of
                PoissonDistribution lambda                      -> [(lambda <= 0, "'lambda' must exceed zero; " ++ show probabilityDistribution ++ ".")]
                ShiftedGeometricDistribution probability        -> [(any ($ probability) [(<= 0), (> 1)], "probability must be in the semi-closed unit-interval (0, 1]; " ++ show probabilityDistribution ++ ".")]

-- | Defines a common interface for probability-distributions.
class Distribution probabilityDistribution      where
        generatePopulation
                :: (Fractional sample, System.Random.RandomGen randomGen)
                => probabilityDistribution
                -> randomGen    -- ^ A generator of /uniformly distributed/ random numbers.
                -> [sample]     -- ^ CAVEAT: the integers generated for discrete distributions are represented by a fractional type; use 'generateDiscretePopulation' if this is a problem.

        getMean :: Fractional mean => probabilityDistribution -> mean   -- ^ The theoretical mean.

        getStandardDeviation :: Floating standardDeviation => probabilityDistribution -> standardDeviation-- ^ The theoretical standard-deviation.
        getStandardDeviation    = sqrt . getVariance    -- Default implementation.

        getVariance :: Floating variance => probabilityDistribution -> variance -- ^ The theoretical variance.
        getVariance     = Math.Power.square . getStandardDeviation      -- Default implementation.

instance (RealFloat parameter, Show parameter, System.Random.Random parameter) => Distribution (ContinuousDistribution parameter)       where
        generatePopulation probabilityDistribution      = map realToFrac {-parameter -> sample-} . generateContinuousPopulation probabilityDistribution

        getMean (ExponentialDistribution lambda)                        = realToFrac $ recip lambda
        getMean (LogNormalDistribution location scale2)                 = realToFrac . exp . (+ location) $ scale2 / 2
        getMean (NormalDistribution mean _)                             = realToFrac mean
        getMean (UniformDistribution (minParameter, maxParameter))      = realToFrac $ (minParameter + maxParameter) / 2

        getVariance (ExponentialDistribution lambda)                    = realToFrac . recip $ Math.Power.square lambda
        getVariance (LogNormalDistribution location scale2)             = realToFrac $ (exp scale2 - 1) * exp (2 * location + scale2)   -- N.B.: for standard-deviation == mean, use scale^2 == ln 2.
        getVariance (NormalDistribution _ variance)                     = realToFrac variance
        getVariance (UniformDistribution (minParameter, maxParameter))  = realToFrac $ Math.Power.square (maxParameter - minParameter) / 12

instance (RealFloat parameter, Show parameter, System.Random.Random parameter) => Distribution (DiscreteDistribution parameter) where
        generatePopulation probabilityDistribution              = map fromInteger . generateDiscretePopulation probabilityDistribution

        getMean (PoissonDistribution lambda)                    = realToFrac lambda
        getMean (ShiftedGeometricDistribution probability)      = realToFrac $ recip probability

        getVariance (PoissonDistribution lambda)                = realToFrac lambda
        getVariance (ShiftedGeometricDistribution probability)  = realToFrac $ (1 - probability) / Math.Power.square probability

{- |
	* Converts a pair of independent /uniformly distributed/ random numbers, within the /semi-closed unit interval/ /(0,1]/,
	to a pair of independent /normally distributed/ random numbers, of standardized /mean/=0, and /variance/=1.

	* <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>.
-}
boxMullerTransform :: (
        Floating        f,
        Ord             f,
        Show            f
 )
        => (f, f)       -- ^ Independent, /uniformly distributed/ random numbers, which must be within the /semi-closed unit interval/, /(0,1]/.
        -> (f, f)       -- ^ Independent, /normally distributed/ random numbers, with standardized /mean/=0 and /variance/=1.
boxMullerTransform cartesian
        | not . uncurry (&&) $ ToolShed.Data.Pair.mirror inSemiClosedUnitInterval cartesian     = error $ "Factory.Math.Probability.boxMullerTransform:\tspecified Cartesian coordinates, must be within semi-closed unit-interval (0, 1]; " ++ show cartesian
        | otherwise                                                                             = polarToCartesianTransform $ (sqrt . negate . (* 2) . log *** (* 2) . (* pi)) cartesian
        where
                inSemiClosedUnitInterval :: (Num n, Ord n) => n -> Bool
                inSemiClosedUnitInterval        = uncurry (&&) . ((> 0) &&& (<= 1))

                polarToCartesianTransform :: Floating f => (f, f) -> (f, f)
                polarToCartesianTransform       = uncurry (*) . Control.Arrow.second cos &&& uncurry (*) . Control.Arrow.second sin

{- |
	* Uses the supplied random-number generator,
	to generate a conceptually infinite list, of /normally distributed/ random numbers, with standardized /mean/=0, and /variance/=1.

	* <https://en.wikipedia.org/wiki/Normal_distribution>, <https://mathworld.wolfram.com/NormalDistribution.html>.
-}
generateStandardizedNormalDistribution :: (
        RealFloat               f,
        Show                    f,
        System.Random.Random    f,
        System.Random.RandomGen randomGen
 ) => randomGen -> [f]
generateStandardizedNormalDistribution  = ToolShed.Data.List.linearise . uncurry (zipWith $ curry boxMullerTransform) . ToolShed.Data.Pair.mirror (
        System.Random.randomRs (minPositiveFloat undefined, 1)
 ) . System.Random.split

-- | Stretches and shifts a /distribution/ to achieve the required /mean/ and /standard-deviation/.
reProfile :: (Distribution distribution, Floating n) => distribution -> [n] -> [n]
reProfile distribution  = map ((+ getMean distribution) . (* getStandardDeviation distribution))

-- | Uses the supplied random-number generator, to generate a conceptually infinite population, with the specified continuous probability-distribution.
generateContinuousPopulation :: (
        RealFloat               f,
        Show                    f,
        System.Random.Random    f,
        System.Random.RandomGen randomGen
 )
        => ContinuousDistribution f
        -> randomGen    -- ^ A generator of /uniformly distributed/ random numbers.
        -> [f]
generateContinuousPopulation probabilityDistribution randomGen
        | not $ ToolShed.SelfValidate.isValid probabilityDistribution   = error $ "Factory.Math.Probability.generateContinuousPopulation:\t" ++ ToolShed.SelfValidate.getFirstError probabilityDistribution
        | otherwise                                                     = (
                case probabilityDistribution of
                        ExponentialDistribution lambda          -> let
                                quantile        = (/ lambda) . negate . log . (1 -)     -- <https://en.wikipedia.org/wiki/Quantile_function>.
                         in map quantile . System.Random.randomRs (0, 1)
                        LogNormalDistribution location scale2   -> map (
                                exp . (+ location) . (* sqrt scale2)    -- Stretch the standard-deviation & re-locate the mean to that specified for the log-space, then return to the original coordinates.
                         ) . generateStandardizedNormalDistribution
                        NormalDistribution _ _                  -> reProfile probabilityDistribution . generateStandardizedNormalDistribution
                        UniformDistribution interval            -> System.Random.randomRs interval
        ) randomGen

{- |
	* Uses the supplied random-number generator,
	to generate a conceptually infinite population, of random integers conforming to the /Poisson distribution/; <https://en.wikipedia.org/wiki/Poisson_distribution>.

	* CAVEAT:
		uses an algorithm by Knuth, which having a /linear time-complexity/ in /lambda/, can be intolerably slow;
		also, the term @exp $ negate lambda@, underflows for large /lambda/;
		so for large /lambda/, this implementation returns the appropriate 'NormalDistribution'.
-}
generatePoissonDistribution :: (
        Integral                sample,
        RealFloat               lambda,
        Show                    lambda,
        System.Random.Random    lambda,
        System.Random.RandomGen randomGen
 )
        => lambda       -- ^ Defines the required approximate value of both /mean/ and /variance/.
        -> randomGen
        -> [sample]
generatePoissonDistribution lambda
        | lambda <= 0   = error $ "Factory.Math.Probability.generatePoissonDistribution:\tlambda must exceed zero " ++ show lambda
        | lambda > (
                negate . log $ minPositiveFloat lambda  -- Guard against underflow, in the user-defined type for lambda.
        )               = filter (>= 0) . map round . (reProfile (PoissonDistribution lambda) :: [Double] -> [Double]) . generateStandardizedNormalDistribution
        | otherwise     = generator
        where
                generator       = uncurry (:) . (
                        fst . head . dropWhile (
                                (> exp (negate lambda)) . snd   -- CAVEAT: underflows if lambda > (103 :: Float, 745 :: Double).
                        ) . scanl (
                                \accumulator random     -> succ *** (* random) $ accumulator
                        ) (negate 1, 1) . System.Random.randomRs (0, 1) *** generator {-recurse-}
                 ) . System.Random.split

-- | Uses the supplied random-number generator, to generate a conceptually infinite population, with the specified discrete probability-distribution.
generateDiscretePopulation :: (
        Integral                sample,
        Ord                     parameter,
        RealFloat               parameter,
        Show                    parameter,
        System.Random.Random    parameter,
        System.Random.RandomGen randomGen
 )
        => DiscreteDistribution parameter
        -> randomGen    -- ^ A generator of /uniformly distributed/ random numbers.
        -> [sample]
generateDiscretePopulation probabilityDistribution randomGen
        | not $ ToolShed.SelfValidate.isValid probabilityDistribution   = error $ "Factory.Math.Probability.generateDiscretePopulation:\t" ++ ToolShed.SelfValidate.getFirstError probabilityDistribution
        | otherwise                                                     = (
                case probabilityDistribution of
                        PoissonDistribution lambda      -> generatePoissonDistribution lambda
                        ShiftedGeometricDistribution probability
                                | probability == 1      -> const $ repeat 1     -- The first Bernoulli Trial is guaranteed to succeed.
                                | otherwise             -> map ceiling {-minimum 1-} . (\x -> x :: [Rational]) . generatePopulation (ExponentialDistribution . negate $ log (1 - probability))  -- The geometric distribution is a discrete version of the exponential distribution.
        ) randomGen