-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A library of statistical types, data, and functions -- -- This library provides a number of common functions and types useful in -- statistics. Our focus is on high performance, numerical robustness, -- and use of good algorithms. Where possible, we provide references to -- the statistical literature. -- -- The library's facilities can be divided into three broad categories: -- -- Working with widely used discrete and continuous probability -- distributions. (There are dozens of exotic distributions in use; we -- focus on the most common.) -- -- Computing with sample data: quantile estimation, kernel density -- estimation, bootstrap methods, and autocorrelation analysis. -- -- Random variate generation under several different distributions. @package statistics @version 0.4.0 -- | Pseudo-random variate generation. module Statistics.RandomVariate -- | State of the pseudo-random number generator. data Gen s -- | An immutable snapshot of the state of a Gen. data Seed -- | The class of types for which we can generate uniformly distributed -- random variates. -- -- The uniform PRNG uses Marsaglia's MWC256 (also known as MWC8222) -- multiply-with-carry generator, which has a period of 2^8222 and fares -- well in tests of randomness. It is also extremely fast, between 2 and -- 3 times faster than the Mersenne Twister. -- -- Note: Marsaglia's PRNG is not known to be cryptographically -- secure, so you should not use it for cryptographic operations. class Variate a uniform :: (Variate a) => Gen s -> ST s a -- | Generate a normally distributed random variate. -- -- The implementation uses Doornik's modified ziggurat algorithm. -- Compared to the ziggurat algorithm usually used, this is slower, but -- generates more independent variates that pass stringent tests of -- randomness. normal :: Gen s -> ST s Double -- | Create a generator for variates using a fixed seed. create :: ST s (Gen s) -- | Create a generator for variates using the given seed, of which up to -- 256 elements will be used. For arrays of less than 256 elements, part -- of the default seed will be used to finish initializing the -- generator's state. -- -- Examples: -- --
-- initialize (singletonU 42) ---- --
-- initialize (toU [4, 8, 15, 16, 23, 42]) ---- -- If a seed contains fewer than 256 elements, it is first used verbatim, -- then its elements are xored against elements of the default -- seed until 256 elements are reached. initialize :: UArr Word32 -> ST s (Gen s) -- | Seed a PRNG with data from the system's fast source of pseudo-random -- numbers ("/dev/urandom" on Unix-like systems), then run the given -- action. -- -- Note: on Windows, this code does not yet use the native -- Cryptographic API as a source of random numbers (it uses the system -- clock instead). As a result, the sequences it generates may not be -- highly independent. withSystemRandom :: (forall s. Gen s -> ST s a) -> IO a -- | Save the state of a Gen, for later use by restore. save :: Gen s -> ST s Seed -- | Create a new Gen that mirrors the state of a saved Seed. restore :: Seed -> ST s (Gen s) -- | Generate an array of pseudo-random variates. This is not necessarily -- faster than invoking uniform repeatedly in a loop, but it may -- be more convenient to use in some situations. uniformArray :: (UA a, Variate a) => Gen s -> Int -> ST s (UArr a) instance Typeable Seed instance Eq Seed instance Read Seed instance Show Seed instance (Variate a, Variate b, Variate c, Variate d) => Variate (a, b, c, d) instance (Variate a, Variate b, Variate c) => Variate (a, b, c) instance (Variate a, Variate b) => Variate (a, b) instance Variate Integer instance Variate Word instance Variate Int instance Variate Double instance Variate Float instance Variate Bool instance Variate Word64 instance Variate Word32 instance Variate Word16 instance Variate Word8 instance Variate Int64 instance Variate Int32 instance Variate Int16 instance Variate Int8 -- | Useful functions. module Statistics.Function -- | Compute the minimum and maximum of an array in one pass. minMax :: UArr Double -> Double :*: Double -- | Sort an array. sort :: (UA e, Ord e) => UArr e -> UArr e -- | Partially sort an array, such that the least k elements will be -- at the front. partialSort :: (UA e, Ord e) => Int -> UArr e -> UArr e -- | Return the indices of an array. indices :: (UA a) => UArr a -> UArr Int -- | Create an array, using the given ST action to populate each -- element. createU :: (UA e) => forall s. Int -> (Int -> ST s e) -> ST s (UArr e) -- | Create an array, using the given IO action to populate each -- element. createIO :: (UA e) => Int -> (Int -> IO e) -> IO (UArr e) -- | Types for working with statistics. module Statistics.Types -- | A function that estimates a property of a sample, such as its -- mean. type Estimator = Sample -> Double -- | Sample data. type Sample = UArr Double -- | Weights for affecting the importance of elements of a sample. type Weights = UArr Double -- | Resampling statistics. module Statistics.Resampling -- | A resample drawn randomly, with replacement, from a set of data -- points. Distinct from a normal array to make it harder for your humble -- author's brain to go wrong. newtype Resample Resample :: UArr Double -> Resample fromResample :: Resample -> UArr Double -- | Compute a statistical estimate repeatedly over a sample, each time -- omitting a successive element. jackknife :: Estimator -> Sample -> UArr Double -- | Resample a data set repeatedly, with replacement, computing each -- estimate over the resampled data. resample :: Gen s -> [Estimator] -> Int -> Sample -> ST s [Resample] instance Eq Resample instance Show Resample -- | Types and functions common to many probability distributions. module Statistics.Distribution -- | The interface shared by all probability distributions. class Distribution d density :: (Distribution d) => d -> Double -> Double cumulative :: (Distribution d) => d -> Double -> Double quantile :: (Distribution d) => d -> Double -> Double class (Distribution d) => Mean d mean :: (Mean d) => d -> Double class (Mean d) => Variance d variance :: (Variance d) => d -> Double -- | Approximate the value of X for which -- P(x>X)=p. -- -- This method uses a combination of Newton-Raphson iteration and -- bisection with the given guess as a starting point. The upper and -- lower bounds specify the interval in which the probability -- distribution reaches the value p. findRoot :: (Distribution d) => d -> Double -> Double -> Double -> Double -> Double -- | The Geometric distribution. This is the probability distribution of -- the number of Bernoulli trials needed to get one success, supported on -- the set [1,2..]. -- -- This distribution is sometimes referred to as the shifted -- geometric distribution, to distinguish it from a variant measuring the -- number of failures before the first success, defined over the set -- [0,1..]. module Statistics.Distribution.Geometric data GeometricDistribution fromSuccess :: Double -> GeometricDistribution pdSuccess :: GeometricDistribution -> Double instance Typeable GeometricDistribution instance Eq GeometricDistribution instance Read GeometricDistribution instance Show GeometricDistribution instance Mean GeometricDistribution instance Variance GeometricDistribution instance Distribution GeometricDistribution -- | Constant values common to much statistics code. module Statistics.Constants -- | The smallest Double larger than 1. m_epsilon :: Double -- | A very large number. m_huge :: Double -- |
-- 1 / sqrt 2 --m_1_sqrt_2 :: Double -- |
-- 2 / sqrt pi --m_2_sqrt_pi :: Double -- | The largest Int x such that 2**(x-1) is -- approximately representable as a Double. m_max_exp :: Int -- |
-- sqrt 2 --m_sqrt_2 :: Double -- |
-- sqrt (2 * pi) --m_sqrt_2_pi :: Double -- | Functions for approximating quantiles, i.e. points taken at regular -- intervals from the cumulative distribution function of a random -- variable. -- -- The number of quantiles is described below by the variable q, -- so with q=4, a 4-quantile (also known as a quartile) has -- 4 intervals, and contains 5 points. The parameter k describes -- the desired point, where 0 ≤ k ≤ q. module Statistics.Quantile -- | O(n log n). Estimate the kth q-quantile of -- a sample, using the weighted average method. weightedAvg :: Int -> Int -> Sample -> Double -- | Parameters a and b to the continuousBy function. data ContParam ContParam :: !!Double -> !!Double -> ContParam -- | O(n log n). Estimate the kth q-quantile of -- a sample x, using the continuous sample method with the given -- parameters. This is the method used by most statistical software, such -- as R, Mathematica, SPSS, and S. continuousBy :: ContParam -> Int -> Int -> Sample -> Double -- | O(n log n). Estimate the range between -- q-quantiles 1 and q-1 of a sample x, using the -- continuous sample method with the given parameters. -- -- For instance, the interquartile range (IQR) can be estimated as -- follows: -- --
-- midspread medianUnbiased 4 (toU [1,1,2,2,3]) -- ==> 1.333333 --midspread :: ContParam -> Int -> Sample -> Double -- | California Department of Public Works definition, a=0, -- b=1. Gives a linear interpolation of the empirical CDF. This -- corresponds to method 4 in R and Mathematica. cadpw :: ContParam -- | Hazen's definition, a=0.5, b=0.5. This is claimed to be -- popular among hydrologists. This corresponds to method 5 in R and -- Mathematica. hazen :: ContParam -- | Definition used by the S statistics application, with a=1, -- b=1. The interpolation points divide the sample range into -- n-1 intervals. This corresponds to method 7 in R and -- Mathematica. s :: ContParam -- | Definition used by the SPSS statistics application, with a=0, -- b=0 (also known as Weibull's definition). This corresponds to -- method 6 in R and Mathematica. spss :: ContParam -- | Median unbiased definition, a=1/3, b=1/3. The resulting -- quantile estimates are approximately median unbiased regardless of the -- distribution of x. This corresponds to method 8 in R and -- Mathematica. medianUnbiased :: ContParam -- | Normal unbiased definition, a=3/8, b=3/8. An -- approximately unbiased estimate if the empirical distribution -- approximates the normal distribution. This corresponds to method 9 in -- R and Mathematica. normalUnbiased :: ContParam -- | Commonly used sample statistics, also known as descriptive statistics. module Statistics.Sample -- | Sample data. type Sample = UArr Double range :: Sample -> Double -- | Arithmetic mean. This uses Welford's algorithm to provide numerical -- stability, using a single pass over the sample data. mean :: Sample -> Double -- | Harmonic mean. This algorithm performs a single pass over the sample. harmonicMean :: Sample -> Double -- | Geometric mean of a sample containing no negative values. geometricMean :: Sample -> Double -- | Compute the kth central moment of a sample. The central moment -- is also known as the moment about the mean. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoment :: Int -> Sample -> Double -- | Compute the kth and jth central moments of a sample. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoments :: Int -> Int -> Sample -> Double :*: Double -- | Compute the skewness of a sample. This is a measure of the asymmetry -- of its distribution. -- -- A sample with negative skew is said to be left-skewed. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- --
-- skewness $ toU [1,100,101,102,103] -- ==> -1.497681449918257 ---- -- A sample with positive skew is said to be right-skewed. -- --
-- skewness $ toU [1,2,3,4,100] -- ==> 1.4975367033335198 ---- -- A sample's skewness is not defined if its variance is zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. skewness :: Sample -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that more -- of the sample's variance is due to infrequent severe deviations, -- rather than more frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. kurtosis :: Sample -> Double -- | Maximum likelihood estimate of a sample's variance. Also known as the -- population variance, where the denominator is n. variance :: Sample -> Double -- | Unbiased estimate of a sample's variance. Also known as the sample -- variance, where the denominator is n-1. varianceUnbiased :: Sample -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. stdDev :: Sample -> Double -- | Maximum likelihood estimate of a sample's variance. fastVariance :: Sample -> Double -- | Unbiased estimate of a sample's variance. fastVarianceUnbiased :: Sample -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. fastStdDev :: Sample -> Double -- | The normal distribution. This is a continuous probability distribution -- that describes data that cluster around a mean. module Statistics.Distribution.Normal -- | The normal distribution. data NormalDistribution fromParams :: Double -> Double -> NormalDistribution fromSample :: Sample -> NormalDistribution standard :: NormalDistribution instance Typeable NormalDistribution instance Eq NormalDistribution instance Read NormalDistribution instance Show NormalDistribution instance Mean NormalDistribution instance Variance NormalDistribution instance Distribution NormalDistribution -- | Mathematical functions for statistics. module Statistics.Math -- | Evaluate a series of Chebyshev polynomials. Uses Clenshaw's algorithm. chebyshev :: Double -> UArr Double -> Double -- | The binomial coefficient. -- --
-- 7 `choose` 3 == 35 --choose :: Int -> Int -> Double -- | Compute the factorial function n!. Returns ∞ if the input is -- above 170 (above which the result cannot be represented by a 64-bit -- Double). factorial :: Int -> Double -- | Compute the natural logarithm of the factorial function. Gives 16 -- decimal digits of precision. logFactorial :: Int -> Double -- | Compute the incomplete gamma integral function γ(s,x). -- Uses Algorithm AS 239 by Shea. incompleteGamma :: Double -> Double -> Double -- | Compute the logarithm of the gamma function Γ(x). Uses -- Algorithm AS 245 by Macleod. -- -- Gives an accuracy of 10–12 significant decimal digits, except for -- small regions around x = 1 and x = 2, where the function -- goes to zero. For greater accuracy, use logGammaL. -- -- Returns ∞ if the input is outside of the range (0 < x ≤ -- 1e305). logGamma :: Double -> Double -- | Compute the logarithm of the gamma function, Γ(x). Uses a -- Lanczos approximation. -- -- This function is slower than logGamma, but gives 14 or more -- significant decimal digits of accuracy, except around x = 1 and -- x = 2, where the function goes to zero. -- -- Returns ∞ if the input is outside of the range (0 < x ≤ -- 1e305). logGammaL :: Double -> Double -- | The binomial distribution. This is the discrete probability -- distribution of the number of successes in a sequence of n -- independent yes/no experiments, each of which yields success with -- probability p. module Statistics.Distribution.Binomial -- | The binomial distribution. data BinomialDistribution binomial :: Int -> Double -> BinomialDistribution -- | Number of trials. bdTrials :: BinomialDistribution -> Int -- | Probability. bdProbability :: BinomialDistribution -> Double instance Typeable BinomialDistribution instance Eq BinomialDistribution instance Read BinomialDistribution instance Show BinomialDistribution instance Mean BinomialDistribution instance Variance BinomialDistribution instance Distribution BinomialDistribution -- | The gamma distribution. This is a continuous probability distribution -- with two parameters, k and ϑ. If k is integral, the -- distribution represents the sum of k independent exponentially -- distributed random variables, each of which has a mean of ϑ. module Statistics.Distribution.Gamma -- | The gamma distribution. data GammaDistribution -- | Shape parameter, k. gdShape :: GammaDistribution -> Double -- | Scale parameter, ϑ. gdScale :: GammaDistribution -> Double instance Typeable GammaDistribution instance Eq GammaDistribution instance Read GammaDistribution instance Show GammaDistribution instance Mean GammaDistribution instance Variance GammaDistribution instance Distribution GammaDistribution -- | The Hypergeometric distribution. This is the discrete probability -- distribution that measures the probability of k successes in -- l trials, without replacement, from a finite population. -- -- The parameters of the distribution describe k elements chosen -- from a population of l, with m elements of one type, and -- l-m of the other (all are positive integers). module Statistics.Distribution.Hypergeometric data HypergeometricDistribution fromParams :: Int -> Int -> Int -> HypergeometricDistribution hdM :: HypergeometricDistribution -> Int hdL :: HypergeometricDistribution -> Int hdK :: HypergeometricDistribution -> Int instance Typeable HypergeometricDistribution instance Eq HypergeometricDistribution instance Read HypergeometricDistribution instance Show HypergeometricDistribution instance Mean HypergeometricDistribution instance Variance HypergeometricDistribution instance Distribution HypergeometricDistribution -- | The Poisson distribution. This is the discrete probability -- distribution of a number of events occurring in a fixed interval if -- these events occur with a known average rate, and occur independently -- from each other within that interval. module Statistics.Distribution.Poisson data PoissonDistribution fromLambda :: Double -> PoissonDistribution instance Typeable PoissonDistribution instance Eq PoissonDistribution instance Read PoissonDistribution instance Show PoissonDistribution instance Mean PoissonDistribution instance Variance PoissonDistribution instance Distribution PoissonDistribution -- | Very fast statistics over simple powers of a sample. These can all be -- computed efficiently in just a single pass over a sample, with that -- pass subject to stream fusion. -- -- The tradeoff is that some of these functions are less numerically -- robust than their counterparts in the Statistics.Sample -- module. Where this is the case, the alternatives are noted. module Statistics.Sample.Powers -- | Sample data. type Sample = UArr Double data Powers -- | O(n) Collect the n simple powers of a sample. -- -- Functions computed over a sample's simple powers require at least a -- certain number (or order) of powers to be collected. -- --
-- skewness . powers 3 $ toU [1,100,101,102,103] -- ==> -1.497681449918257 ---- -- A sample with positive skew is said to be right-skewed. -- --
-- skewness . powers 3 $ toU [1,2,3,4,100] -- ==> 1.4975367033335198 ---- -- A sample's skewness is not defined if its variance is zero. -- -- Requires Powers with order at least 3. skewness :: Powers -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that the -- sample's variance is due more to infrequent severe deviations than to -- frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- Requires Powers with order at least 4. kurtosis :: Powers -> Double instance Eq Powers instance Read Powers instance Show Powers -- | The exponential distribution. This is the continunous probability -- distribution of the times between events in a poisson process, in -- which events occur continuously and independently at a constant -- average rate. module Statistics.Distribution.Exponential data ExponentialDistribution fromLambda :: Double -> ExponentialDistribution fromSample :: Sample -> ExponentialDistribution edLambda :: ExponentialDistribution -> Double instance Typeable ExponentialDistribution instance Eq ExponentialDistribution instance Read ExponentialDistribution instance Show ExponentialDistribution instance Mean ExponentialDistribution instance Variance ExponentialDistribution instance Distribution ExponentialDistribution -- | Kernel density estimation code, providing non-parametric ways to -- estimate the probability density function of a sample. module Statistics.KernelDensity -- | Simple Epanechnikov kernel density estimator. Returns the uniformly -- spaced points from the sample range at which the density function was -- estimated, and the estimates at those points. epanechnikovPDF :: Int -> Sample -> (Points, UArr Double) -- | Simple Gaussian kernel density estimator. Returns the uniformly spaced -- points from the sample range at which the density function was -- estimated, and the estimates at those points. gaussianPDF :: Int -> Sample -> (Points, UArr Double) -- | Points from the range of a Sample. newtype Points Points :: UArr Double -> Points fromPoints :: Points -> UArr Double -- | Choose a uniform range of points at which to estimate a sample's -- probability density function. -- -- If you are using a Gaussian kernel, multiply the sample's bandwidth by -- 3 before passing it to this function. -- -- If this function is passed an empty vector, it returns values of -- positive and negative infinity. choosePoints :: Int -> Double -> Sample -> Points -- | The width of the convolution kernel used. type Bandwidth = Double -- | Compute the optimal bandwidth from the observed data for the given -- kernel. bandwidth :: (Double -> Bandwidth) -> Sample -> Bandwidth -- | Bandwidth estimator for an Epanechnikov kernel. epanechnikovBW :: Double -> Bandwidth -- | Bandwidth estimator for a Gaussian kernel. gaussianBW :: Double -> Bandwidth -- | The convolution kernel. Its parameters are as follows: -- --