-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A library of statistical types, data, and functions -- -- This library provides a number of common functions and types useful in -- statistics. Our focus is on high performance, numerical robustness, -- and use of good algorithms. Where possible, we provide references to -- the statistical literature. -- -- The library's facilities can be divided into three broad categories: -- -- Working with widely used discrete and continuous probability -- distributions. (There are dozens of exotic distributions in use; we -- focus on the most common.) -- -- Computing with sample data: quantile estimation, kernel density -- estimation, bootstrap methods, and autocorrelation analysis. @package statistics @version 0.2 -- | Useful functions. module Statistics.Function -- | Compute the minimum and maximum of an array in one pass. minMax :: UArr Double -> Double :*: Double -- | Sort an array. sort :: (UA e, Ord e) => UArr e -> UArr e -- | Partially sort an array, such that the least k elements will be -- at the front. partialSort :: (UA e, Ord e) => Int -> UArr e -> UArr e -- | Types for working with statistics. module Statistics.Types -- | Sample data. type Sample = UArr Double -- | A function that estimates a property of a sample, such as its -- mean. type Estimator = Sample -> Double -- | Weights for affecting the importance of elements of a sample. type Weights = UArr Double -- | Resampling statistics. module Statistics.Resampling -- | A resample drawn randomly, with replacement, from a set of data -- points. Distinct from a normal array to make it harder for your humble -- author's brain to go wrong. newtype Resample Resample :: UArr Double -> Resample fromResample :: Resample -> UArr Double -- | Compute a statistical estimate repeatedly over a sample, each time -- omitting a successive element. jackknife :: Estimator -> Sample -> UArr Double -- | Resample a data set repeatedly, with replacement, computing each -- estimate over the resampled data. resample :: MTGen -> [Estimator] -> Int -> Sample -> IO [Resample] instance Eq Resample instance Show Resample -- | Types and functions common to many probability distributions. module Statistics.Distribution -- | The interface shared by all probability distributions. class Distribution d probability :: (Distribution d) => d -> Double -> Double cumulative :: (Distribution d) => d -> Double -> Double inverse :: (Distribution d) => d -> Double -> Double class (Distribution d) => Mean d mean :: (Mean d) => d -> Double class (Mean d) => Variance d variance :: (Variance d) => d -> Double -- | Approximate the value of X for which -- P(x>X)=p. -- -- This method uses a combination of Newton-Raphson iteration and -- bisection with the given guess as a starting point. The upper and -- lower bounds specify the interval in which the probability -- distribution reaches the value p. findRoot :: (Distribution d) => d -> Double -> Double -> Double -> Double -> Double -- | The Geometric distribution. This is the discrete probability -- distribution of a number of events occurring in a fixed interval if -- these events occur with a known average rate, and occur independently -- from each other within that interval. module Statistics.Distribution.Geometric data GeometricDistribution fromSuccess :: Double -> GeometricDistribution pdSuccess :: GeometricDistribution -> Double instance Typeable GeometricDistribution instance Eq GeometricDistribution instance Read GeometricDistribution instance Show GeometricDistribution instance Mean GeometricDistribution instance Variance GeometricDistribution instance Distribution GeometricDistribution -- | Constant values common to much statistics code. module Statistics.Constants -- | The smallest Double larger than 1. m_epsilon :: Double -- | A very large number. m_huge :: Double -- |
-- 1 / sqrt 2 --m_1_sqrt_2 :: Double -- |
-- 2 / sqrt pi --m_2_sqrt_pi :: Double -- | The largest Int x such that 2**(x-1) is -- approximately representable as a Double. m_max_exp :: Int -- |
-- sqrt 2 --m_sqrt_2 :: Double -- |
-- sqrt (2 * pi) --m_sqrt_2_pi :: Double -- | Functions for approximating quantiles, i.e. points taken at regular -- intervals from the cumulative distribution function of a random -- variable. -- -- The number of quantiles is described below by the variable q, -- so with q=4, a 4-quantile (also known as a quartile) has -- 4 intervals, and contains 5 points. The parameter k describes -- the desired point, where 0 ≤ k ≤ q. module Statistics.Quantile -- | Estimate the kth q-quantile of a sample, using the -- weighted average method. weightedAvg :: Int -> Int -> Sample -> Double -- | Parameters a and b to the continuousBy function. data ContParam ContParam :: !!Double -> !!Double -> ContParam -- | Estimate the kth q-quantile of a sample x, using -- the continuous sample method with the given parameters. This is the -- method used by most statistical software, such as R, Mathematica, -- SPSS, and S. continuousBy :: ContParam -> Int -> Int -> Sample -> Double -- | California Department of Public Works definition, a=0, -- b=1. Gives a linear interpolation of the empirical CDF. This -- corresponds to method 4 in R and Mathematica. cadpw :: ContParam -- | Hazen's definition, a=0.5, b=0.5. This is claimed to be -- popular among hydrologists. This corresponds to method 5 in R and -- Mathematica. hazen :: ContParam -- | Definition used by the S statistics application, with a=1, -- b=1. The interpolation points divide the sample range into -- n-1 intervals. This corresponds to method 7 in R and -- Mathematica. s :: ContParam -- | Definition used by the SPSS statistics application, with a=0, -- b=0 (also known as Weibull's definition). This corresponds to -- method 6 in R and Mathematica. spss :: ContParam -- | Median unbiased definition, a=1/3, b=1/3. The resulting -- quantile estimates are approximately median unbiased regardless of the -- distribution of x. This corresponds to method 8 in R and -- Mathematica. medianUnbiased :: ContParam -- | Normal unbiased definition, a=3/8, b=3/8. An -- approximately unbiased estimate if the empirical distribution -- approximates the normal distribution. This corresponds to method 9 in -- R and Mathematica. normalUnbiased :: ContParam -- | Commonly used sample statistics, also known as descriptive statistics. module Statistics.Sample -- | Sample data. type Sample = UArr Double -- | Arithmetic mean. This uses Welford's algorithm to provide numerical -- stability, using a single pass over the sample data. mean :: Sample -> Double -- | Harmonic mean. This algorithm performs a single pass over the sample. harmonicMean :: Sample -> Double -- | Geometric mean of a sample containing no negative values. geometricMean :: Sample -> Double -- | Maximum likelihood estimate of a sample's variance. variance :: Sample -> Double -- | Unbiased estimate of a sample's variance. varianceUnbiased :: Sample -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. stdDev :: Sample -> Double -- | Maximum likelihood estimate of a sample's variance. fastVariance :: Sample -> Double -- | Unbiased estimate of a sample's variance. fastVarianceUnbiased :: Sample -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. fastStdDev :: Sample -> Double -- | The exponential distribution. This is the discrete probability -- distribution of the number of successes in a sequence of n -- independent yes/no experiments, each of which yields success with -- probability p. module Statistics.Distribution.Exponential data ExponentialDistribution fromLambda :: Double -> ExponentialDistribution fromSample :: Sample -> ExponentialDistribution instance Typeable ExponentialDistribution instance Eq ExponentialDistribution instance Read ExponentialDistribution instance Show ExponentialDistribution instance Mean ExponentialDistribution instance Variance ExponentialDistribution instance Distribution ExponentialDistribution -- | The normal distribution. This is a continuous probability distribution -- that describes data that cluster around a mean. module Statistics.Distribution.Normal -- | The normal distribution. data NormalDistribution fromParams :: Double -> Double -> NormalDistribution fromSample :: Sample -> NormalDistribution standard :: NormalDistribution instance Typeable NormalDistribution instance Eq NormalDistribution instance Read NormalDistribution instance Show NormalDistribution instance Mean NormalDistribution instance Variance NormalDistribution instance Distribution NormalDistribution -- | Mathematical functions for statistics. module Statistics.Math -- | Evaluate a series of Chebyshev polynomials. Uses Clenshaw's algorithm. chebyshev :: Double -> UArr Double -> Double -- | The binomial coefficient. -- --
-- 7 `choose` 3 == 35 --choose :: Int -> Int -> Int -- | Compute the factorial function n!. Returns ∞ if the input is -- above 170 (above which the result cannot be represented by a 64-bit -- Double). factorial :: Int -> Double -- | Compute the natural logarithm of the factorial function. Gives 16 -- decimal digits of precision. logFactorial :: Int -> Double -- | Compute the incomplete gamma integral function γ(s,x). -- Uses Algorithm AS 239 by Shea. incompleteGamma :: Double -> Double -> Double -- | Compute the logarithm of the gamma function Γ(x). Uses -- Algorithm AS 245 by Macleod. -- -- Gives an accuracy of 10–12 significant decimal digits, except for -- small regions around x = 1 and x = 2, where the function -- goes to zero. For greater accuracy, use logGammaL. -- -- Returns ∞ if the input is outside of the range (0 < x ≤ -- 1e305). logGamma :: Double -> Double -- | Compute the logarithm of the gamma function, Γ(x). Uses a -- Lanczos approximation. -- -- This function is slower than logGamma, but gives 14 or more -- significant decimal digits of accuracy, except around x = 1 and -- x = 2, where the function goes to zero. -- -- Returns ∞ if the input is outside of the range (0 < x ≤ -- 1e305). logGammaL :: Double -> Double -- | The binomial distribution. This is the discrete probability -- distribution of the number of successes in a sequence of n -- independent yes/no experiments, each of which yields success with -- probability p. module Statistics.Distribution.Binomial -- | The binomial distribution. data BinomialDistribution binomial :: Int -> Double -> BinomialDistribution -- | Number of trials. bdTrials :: BinomialDistribution -> Int -- | Probability. bdProbability :: BinomialDistribution -> Double instance Typeable BinomialDistribution instance Eq BinomialDistribution instance Read BinomialDistribution instance Show BinomialDistribution instance Mean BinomialDistribution instance Variance BinomialDistribution instance Distribution BinomialDistribution -- | The gamma distribution. This is a continuous probability distribution -- with two parameters, k and ϑ. If k is integral, the -- distribution represents the sum of k independent exponentially -- distributed random variables, each of which has a mean of ϑ. module Statistics.Distribution.Gamma -- | The gamma distribution. data GammaDistribution -- | Shape parameter, k. gdShape :: GammaDistribution -> Double -- | Scale parameter, ϑ. gdScale :: GammaDistribution -> Double instance Typeable GammaDistribution instance Eq GammaDistribution instance Read GammaDistribution instance Show GammaDistribution instance Mean GammaDistribution instance Variance GammaDistribution instance Distribution GammaDistribution -- | The Hypergeometric distribution. This is the discrete probability -- distribution that measures the probability of k successes in -- l trials, without replacement, from a finite population. -- -- The parameters of the distribution describe k elements chosen -- from a population of l, with m elements of one type, and -- l-m of the other (all are positive integers). module Statistics.Distribution.Hypergeometric data HypergeometricDistribution fromParams :: Int -> Int -> Int -> HypergeometricDistribution hdM :: HypergeometricDistribution -> Int hdL :: HypergeometricDistribution -> Int hdK :: HypergeometricDistribution -> Int instance Typeable HypergeometricDistribution instance Eq HypergeometricDistribution instance Read HypergeometricDistribution instance Show HypergeometricDistribution instance Mean HypergeometricDistribution instance Variance HypergeometricDistribution instance Distribution HypergeometricDistribution -- | The Poisson distribution. This is the discrete probability -- distribution of a number of events occurring in a fixed interval if -- these events occur with a known average rate, and occur independently -- from each other within that interval. module Statistics.Distribution.Poisson data PoissonDistribution fromLambda :: Double -> PoissonDistribution instance Typeable PoissonDistribution instance Eq PoissonDistribution instance Read PoissonDistribution instance Show PoissonDistribution instance Mean PoissonDistribution instance Variance PoissonDistribution instance Distribution PoissonDistribution -- | Kernel density estimation code, providing non-parametric ways to -- estimate the probability density function of a sample. module Statistics.KernelDensity -- | Simple Epanechnikov kernel density estimator. Returns the uniformly -- spaced points from the sample range at which the density function was -- estimated, and the estimates at those points. epanechnikovPDF :: Int -> Sample -> (Points, UArr Double) -- | Simple Gaussian kernel density estimator. Returns the uniformly spaced -- points from the sample range at which the density function was -- estimated, and the estimates at those points. gaussianPDF :: Int -> Sample -> (Points, UArr Double) -- | Points from the range of a Sample. newtype Points Points :: UArr Double -> Points fromPoints :: Points -> UArr Double -- | Choose a uniform range of points at which to estimate a sample's -- probability density function. -- -- If you are using a Gaussian kernel, multiply the sample's bandwidth by -- 3 before passing it to this function. -- -- If this function is passed an empty vector, it returns values of -- positive and negative infinity. choosePoints :: Int -> Double -> Sample -> Points -- | The width of the convolution kernel used. type Bandwidth = Double -- | Compute the optimal bandwidth from the observed data for the given -- kernel. bandwidth :: (Double -> Bandwidth) -> Sample -> Bandwidth -- | Bandwidth estimator for an Epanechnikov kernel. epanechnikovBW :: Double -> Bandwidth -- | Bandwidth estimator for a Gaussian kernel. gaussianBW :: Double -> Bandwidth -- | The convolution kernel. Its parameters are as follows: -- --