-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A library of statistical types, data, and functions -- -- This library provides a number of common functions and types useful in -- statistics. Our focus is on high performance, numerical robustness, -- and use of good algorithms. Where possible, we provide references to -- the statistical literature. -- -- The library's facilities can be divided into three broad categories: -- -- Working with widely used discrete and continuous probability -- distributions. (There are dozens of exotic distributions in use; we -- focus on the most common.) -- -- Computing with sample data: quantile estimation, kernel density -- estimation, bootstrap methods, and autocorrelation analysis. -- -- Random variate generation under several different distributions. @package statistics @version 0.4.1 module Statistics.RandomVariate -- | Useful functions. module Statistics.Function -- | Compute the minimum and maximum of an array in one pass. minMax :: UArr Double -> Double :*: Double -- | Sort an array. sort :: (UA e, Ord e) => UArr e -> UArr e -- | Partially sort an array, such that the least k elements will be -- at the front. partialSort :: (UA e, Ord e) => Int -> UArr e -> UArr e -- | Return the indices of an array. indices :: (UA a) => UArr a -> UArr Int -- | Create an array, using the given ST action to populate each -- element. createU :: (UA e) => forall s. Int -> (Int -> ST s e) -> ST s (UArr e) -- | Create an array, using the given IO action to populate each -- element. createIO :: (UA e) => Int -> (Int -> IO e) -> IO (UArr e) -- | Types for working with statistics. module Statistics.Types -- | A function that estimates a property of a sample, such as its -- mean. type Estimator = Sample -> Double -- | Sample data. type Sample = UArr Double -- | Weights for affecting the importance of elements of a sample. type Weights = UArr Double -- | Resampling statistics. module Statistics.Resampling -- | A resample drawn randomly, with replacement, from a set of data -- points. Distinct from a normal array to make it harder for your humble -- author's brain to go wrong. newtype Resample Resample :: UArr Double -> Resample fromResample :: Resample -> UArr Double -- | Compute a statistical estimate repeatedly over a sample, each time -- omitting a successive element. jackknife :: Estimator -> Sample -> UArr Double -- | Resample a data set repeatedly, with replacement, computing each -- estimate over the resampled data. resample :: Gen s -> [Estimator] -> Int -> Sample -> ST s [Resample] instance Eq Resample instance Show Resample -- | Types and functions common to many probability distributions. module Statistics.Distribution -- | The interface shared by all probability distributions. class Distribution d density :: (Distribution d) => d -> Double -> Double cumulative :: (Distribution d) => d -> Double -> Double quantile :: (Distribution d) => d -> Double -> Double class (Distribution d) => Mean d mean :: (Mean d) => d -> Double class (Mean d) => Variance d variance :: (Variance d) => d -> Double -- | Approximate the value of X for which -- P(x>X)=p. -- -- This method uses a combination of Newton-Raphson iteration and -- bisection with the given guess as a starting point. The upper and -- lower bounds specify the interval in which the probability -- distribution reaches the value p. findRoot :: (Distribution d) => d -> Double -> Double -> Double -> Double -> Double -- | The Geometric distribution. This is the probability distribution of -- the number of Bernoulli trials needed to get one success, supported on -- the set [1,2..]. -- -- This distribution is sometimes referred to as the shifted -- geometric distribution, to distinguish it from a variant measuring the -- number of failures before the first success, defined over the set -- [0,1..]. module Statistics.Distribution.Geometric data GeometricDistribution fromSuccess :: Double -> GeometricDistribution pdSuccess :: GeometricDistribution -> Double instance Typeable GeometricDistribution instance Eq GeometricDistribution instance Read GeometricDistribution instance Show GeometricDistribution instance Mean GeometricDistribution instance Variance GeometricDistribution instance Distribution GeometricDistribution -- | Constant values common to much statistics code. module Statistics.Constants -- | The smallest Double larger than 1. m_epsilon :: Double -- | A very large number. m_huge :: Double -- |
--   1 / sqrt 2
--   
m_1_sqrt_2 :: Double -- |
--   2 / sqrt pi
--   
m_2_sqrt_pi :: Double -- | The largest Int x such that 2**(x-1) is -- approximately representable as a Double. m_max_exp :: Int -- |
--   sqrt 2
--   
m_sqrt_2 :: Double -- |
--   sqrt (2 * pi)
--   
m_sqrt_2_pi :: Double -- | Functions for approximating quantiles, i.e. points taken at regular -- intervals from the cumulative distribution function of a random -- variable. -- -- The number of quantiles is described below by the variable q, -- so with q=4, a 4-quantile (also known as a quartile) has -- 4 intervals, and contains 5 points. The parameter k describes -- the desired point, where 0 ≤ kq. module Statistics.Quantile -- | O(n log n). Estimate the kth q-quantile of -- a sample, using the weighted average method. weightedAvg :: Int -> Int -> Sample -> Double -- | Parameters a and b to the continuousBy function. data ContParam ContParam :: !!Double -> !!Double -> ContParam -- | O(n log n). Estimate the kth q-quantile of -- a sample x, using the continuous sample method with the given -- parameters. This is the method used by most statistical software, such -- as R, Mathematica, SPSS, and S. continuousBy :: ContParam -> Int -> Int -> Sample -> Double -- | O(n log n). Estimate the range between -- q-quantiles 1 and q-1 of a sample x, using the -- continuous sample method with the given parameters. -- -- For instance, the interquartile range (IQR) can be estimated as -- follows: -- --
--   midspread medianUnbiased 4 (toU [1,1,2,2,3])
--   ==> 1.333333
--   
midspread :: ContParam -> Int -> Sample -> Double -- | California Department of Public Works definition, a=0, -- b=1. Gives a linear interpolation of the empirical CDF. This -- corresponds to method 4 in R and Mathematica. cadpw :: ContParam -- | Hazen's definition, a=0.5, b=0.5. This is claimed to be -- popular among hydrologists. This corresponds to method 5 in R and -- Mathematica. hazen :: ContParam -- | Definition used by the S statistics application, with a=1, -- b=1. The interpolation points divide the sample range into -- n-1 intervals. This corresponds to method 7 in R and -- Mathematica. s :: ContParam -- | Definition used by the SPSS statistics application, with a=0, -- b=0 (also known as Weibull's definition). This corresponds to -- method 6 in R and Mathematica. spss :: ContParam -- | Median unbiased definition, a=1/3, b=1/3. The resulting -- quantile estimates are approximately median unbiased regardless of the -- distribution of x. This corresponds to method 8 in R and -- Mathematica. medianUnbiased :: ContParam -- | Normal unbiased definition, a=3/8, b=3/8. An -- approximately unbiased estimate if the empirical distribution -- approximates the normal distribution. This corresponds to method 9 in -- R and Mathematica. normalUnbiased :: ContParam -- | Commonly used sample statistics, also known as descriptive statistics. module Statistics.Sample -- | Sample data. type Sample = UArr Double range :: Sample -> Double -- | Arithmetic mean. This uses Welford's algorithm to provide numerical -- stability, using a single pass over the sample data. mean :: Sample -> Double -- | Harmonic mean. This algorithm performs a single pass over the sample. harmonicMean :: Sample -> Double -- | Geometric mean of a sample containing no negative values. geometricMean :: Sample -> Double -- | Compute the kth central moment of a sample. The central moment -- is also known as the moment about the mean. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoment :: Int -> Sample -> Double -- | Compute the kth and jth central moments of a sample. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoments :: Int -> Int -> Sample -> Double :*: Double -- | Compute the skewness of a sample. This is a measure of the asymmetry -- of its distribution. -- -- A sample with negative skew is said to be left-skewed. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- --
--   skewness $ toU [1,100,101,102,103]
--   ==> -1.497681449918257
--   
-- -- A sample with positive skew is said to be right-skewed. -- --
--   skewness $ toU [1,2,3,4,100]
--   ==> 1.4975367033335198
--   
-- -- A sample's skewness is not defined if its variance is zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. skewness :: Sample -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that more -- of the sample's variance is due to infrequent severe deviations, -- rather than more frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. kurtosis :: Sample -> Double -- | Maximum likelihood estimate of a sample's variance. Also known as the -- population variance, where the denominator is n. variance :: Sample -> Double -- | Unbiased estimate of a sample's variance. Also known as the sample -- variance, where the denominator is n-1. varianceUnbiased :: Sample -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. stdDev :: Sample -> Double -- | Maximum likelihood estimate of a sample's variance. fastVariance :: Sample -> Double -- | Unbiased estimate of a sample's variance. fastVarianceUnbiased :: Sample -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. fastStdDev :: Sample -> Double -- | The normal distribution. This is a continuous probability distribution -- that describes data that cluster around a mean. module Statistics.Distribution.Normal -- | The normal distribution. data NormalDistribution fromParams :: Double -> Double -> NormalDistribution fromSample :: Sample -> NormalDistribution standard :: NormalDistribution instance Typeable NormalDistribution instance Eq NormalDistribution instance Read NormalDistribution instance Show NormalDistribution instance Mean NormalDistribution instance Variance NormalDistribution instance Distribution NormalDistribution -- | Mathematical functions for statistics. module Statistics.Math -- | Evaluate a series of Chebyshev polynomials. Uses Clenshaw's algorithm. chebyshev :: Double -> UArr Double -> Double -- | The binomial coefficient. -- --
--   7 `choose` 3 == 35
--   
choose :: Int -> Int -> Double -- | Compute the factorial function n!. Returns ∞ if the input is -- above 170 (above which the result cannot be represented by a 64-bit -- Double). factorial :: Int -> Double -- | Compute the natural logarithm of the factorial function. Gives 16 -- decimal digits of precision. logFactorial :: Int -> Double -- | Compute the incomplete gamma integral function γ(s,x). -- Uses Algorithm AS 239 by Shea. incompleteGamma :: Double -> Double -> Double -- | Compute the logarithm of the gamma function Γ(x). Uses -- Algorithm AS 245 by Macleod. -- -- Gives an accuracy of 10–12 significant decimal digits, except for -- small regions around x = 1 and x = 2, where the function -- goes to zero. For greater accuracy, use logGammaL. -- -- Returns ∞ if the input is outside of the range (0 < x ≤ -- 1e305). logGamma :: Double -> Double -- | Compute the logarithm of the gamma function, Γ(x). Uses a -- Lanczos approximation. -- -- This function is slower than logGamma, but gives 14 or more -- significant decimal digits of accuracy, except around x = 1 and -- x = 2, where the function goes to zero. -- -- Returns ∞ if the input is outside of the range (0 < x ≤ -- 1e305). logGammaL :: Double -> Double -- | The binomial distribution. This is the discrete probability -- distribution of the number of successes in a sequence of n -- independent yes/no experiments, each of which yields success with -- probability p. module Statistics.Distribution.Binomial -- | The binomial distribution. data BinomialDistribution binomial :: Int -> Double -> BinomialDistribution -- | Number of trials. bdTrials :: BinomialDistribution -> Int -- | Probability. bdProbability :: BinomialDistribution -> Double instance Typeable BinomialDistribution instance Eq BinomialDistribution instance Read BinomialDistribution instance Show BinomialDistribution instance Mean BinomialDistribution instance Variance BinomialDistribution instance Distribution BinomialDistribution -- | The gamma distribution. This is a continuous probability distribution -- with two parameters, k and ϑ. If k is integral, the -- distribution represents the sum of k independent exponentially -- distributed random variables, each of which has a mean of ϑ. module Statistics.Distribution.Gamma -- | The gamma distribution. data GammaDistribution -- | Shape parameter, k. gdShape :: GammaDistribution -> Double -- | Scale parameter, ϑ. gdScale :: GammaDistribution -> Double instance Typeable GammaDistribution instance Eq GammaDistribution instance Read GammaDistribution instance Show GammaDistribution instance Mean GammaDistribution instance Variance GammaDistribution instance Distribution GammaDistribution -- | The Hypergeometric distribution. This is the discrete probability -- distribution that measures the probability of k successes in -- l trials, without replacement, from a finite population. -- -- The parameters of the distribution describe k elements chosen -- from a population of l, with m elements of one type, and -- l-m of the other (all are positive integers). module Statistics.Distribution.Hypergeometric data HypergeometricDistribution fromParams :: Int -> Int -> Int -> HypergeometricDistribution hdM :: HypergeometricDistribution -> Int hdL :: HypergeometricDistribution -> Int hdK :: HypergeometricDistribution -> Int instance Typeable HypergeometricDistribution instance Eq HypergeometricDistribution instance Read HypergeometricDistribution instance Show HypergeometricDistribution instance Mean HypergeometricDistribution instance Variance HypergeometricDistribution instance Distribution HypergeometricDistribution -- | The Poisson distribution. This is the discrete probability -- distribution of a number of events occurring in a fixed interval if -- these events occur with a known average rate, and occur independently -- from each other within that interval. module Statistics.Distribution.Poisson data PoissonDistribution fromLambda :: Double -> PoissonDistribution instance Typeable PoissonDistribution instance Eq PoissonDistribution instance Read PoissonDistribution instance Show PoissonDistribution instance Mean PoissonDistribution instance Variance PoissonDistribution instance Distribution PoissonDistribution -- | Very fast statistics over simple powers of a sample. These can all be -- computed efficiently in just a single pass over a sample, with that -- pass subject to stream fusion. -- -- The tradeoff is that some of these functions are less numerically -- robust than their counterparts in the Statistics.Sample -- module. Where this is the case, the alternatives are noted. module Statistics.Sample.Powers -- | Sample data. type Sample = UArr Double data Powers -- | O(n) Collect the n simple powers of a sample. -- -- Functions computed over a sample's simple powers require at least a -- certain number (or order) of powers to be collected. -- -- -- -- This function is subject to stream fusion. powers :: Int -> Sample -> Powers -- | The order (number) of simple powers collected from a Sample. order :: Powers -> Int -- | The number of elements in the original Sample. This is the -- sample's zeroth simple power. count :: Powers -> Int -- | The sum of elements in the original Sample. This is the -- sample's first simple power. sum :: Powers -> Double -- | The arithmetic mean of elements in the original Sample. -- -- This is less numerically robust than the mean function in the -- Statistics.Sample module, but the number is essentially free -- to compute if you have already collected a sample's simple powers. mean :: Powers -> Double -- | Maximum likelihood estimate of a sample's variance. Also known as the -- population variance, where the denominator is n. This is the -- second central moment of the sample. -- -- This is less numerically robust than the variance function in the -- Statistics.Sample module, but the number is essentially free -- to compute if you have already collected a sample's simple powers. -- -- Requires Powers with order at least 2. variance :: Powers -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. stdDev :: Powers -> Double -- | Unbiased estimate of a sample's variance. Also known as the sample -- variance, where the denominator is n-1. -- -- Requires Powers with order at least 2. varianceUnbiased :: Powers -> Double -- | Compute the kth central moment of a Sample. The central -- moment is also known as the moment about the mean. centralMoment :: Int -> Powers -> Double -- | Compute the skewness of a sample. This is a measure of the asymmetry -- of its distribution. -- -- A sample with negative skew is said to be left-skewed. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- --
--   skewness . powers 3 $ toU [1,100,101,102,103]
--   ==> -1.497681449918257
--   
-- -- A sample with positive skew is said to be right-skewed. -- --
--   skewness . powers 3 $ toU [1,2,3,4,100]
--   ==> 1.4975367033335198
--   
-- -- A sample's skewness is not defined if its variance is zero. -- -- Requires Powers with order at least 3. skewness :: Powers -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that the -- sample's variance is due more to infrequent severe deviations than to -- frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- Requires Powers with order at least 4. kurtosis :: Powers -> Double instance Eq Powers instance Read Powers instance Show Powers -- | The exponential distribution. This is the continunous probability -- distribution of the times between events in a poisson process, in -- which events occur continuously and independently at a constant -- average rate. module Statistics.Distribution.Exponential data ExponentialDistribution fromLambda :: Double -> ExponentialDistribution fromSample :: Sample -> ExponentialDistribution edLambda :: ExponentialDistribution -> Double instance Typeable ExponentialDistribution instance Eq ExponentialDistribution instance Read ExponentialDistribution instance Show ExponentialDistribution instance Mean ExponentialDistribution instance Variance ExponentialDistribution instance Distribution ExponentialDistribution -- | Kernel density estimation code, providing non-parametric ways to -- estimate the probability density function of a sample. module Statistics.KernelDensity -- | Simple Epanechnikov kernel density estimator. Returns the uniformly -- spaced points from the sample range at which the density function was -- estimated, and the estimates at those points. epanechnikovPDF :: Int -> Sample -> (Points, UArr Double) -- | Simple Gaussian kernel density estimator. Returns the uniformly spaced -- points from the sample range at which the density function was -- estimated, and the estimates at those points. gaussianPDF :: Int -> Sample -> (Points, UArr Double) -- | Points from the range of a Sample. newtype Points Points :: UArr Double -> Points fromPoints :: Points -> UArr Double -- | Choose a uniform range of points at which to estimate a sample's -- probability density function. -- -- If you are using a Gaussian kernel, multiply the sample's bandwidth by -- 3 before passing it to this function. -- -- If this function is passed an empty vector, it returns values of -- positive and negative infinity. choosePoints :: Int -> Double -> Sample -> Points -- | The width of the convolution kernel used. type Bandwidth = Double -- | Compute the optimal bandwidth from the observed data for the given -- kernel. bandwidth :: (Double -> Bandwidth) -> Sample -> Bandwidth -- | Bandwidth estimator for an Epanechnikov kernel. epanechnikovBW :: Double -> Bandwidth -- | Bandwidth estimator for a Gaussian kernel. gaussianBW :: Double -> Bandwidth -- | The convolution kernel. Its parameters are as follows: -- -- type Kernel = Double -> Double -> Double -> Double -> Double -- | Epanechnikov kernel for probability density function estimation. epanechnikovKernel :: Kernel -- | Gaussian kernel for probability density function estimation. gaussianKernel :: Kernel -- | Kernel density estimator, providing a non-parametric way of estimating -- the PDF of a random variable. estimatePDF :: Kernel -> Bandwidth -> Sample -> Points -> UArr Double -- | A helper for creating a simple kernel density estimation function with -- automatically chosen bandwidth and estimation points. simplePDF :: (Double -> Double) -> Kernel -> Double -> Int -> Sample -> (Points, UArr Double) instance Eq Points instance Show Points -- | The bootstrap method for statistical inference. module Statistics.Resampling.Bootstrap -- | A point and interval estimate computed via an Estimator. data Estimate Estimate :: !!Double -> !!Double -> !!Double -> !!Double -> Estimate -- | Point estimate. estPoint :: Estimate -> !!Double -- | Lower bound of the estimate interval (i.e. the lower bound of the -- confidence interval). estLowerBound :: Estimate -> !!Double -- | Upper bound of the estimate interval (i.e. the upper bound of the -- confidence interval). estUpperBound :: Estimate -> !!Double -- | Confidence level of the confidence intervals. estConfidenceLevel :: Estimate -> !!Double -- | Bias-corrected accelerated (BCA) bootstrap. This adjusts for both bias -- and skewness in the resampled distribution. bootstrapBCA :: Double -> Sample -> [Estimator] -> [Resample] -> [Estimate] instance Eq Estimate instance Show Estimate -- | Functions for computing autocovariance and autocorrelation of a -- sample. module Statistics.Autocorrelation -- | Compute the autocovariance of a sample, i.e. the covariance of the -- sample against a shifted version of itself. autocovariance :: Sample -> UArr Double -- | Compute the autocorrelation function of a sample, and the upper and -- lower bounds of confidence intervals for each element. -- -- Note: The calculation of the 95% confidence interval assumes a -- stationary Gaussian process. autocorrelation :: Sample -> (UArr Double, UArr Double, UArr Double)