-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A library of statistical types, data, and functions -- -- This library provides a number of common functions and types useful in -- statistics. We focus on high performance, numerical robustness, and -- use of good algorithms. Where possible, we provide references to the -- statistical literature. -- -- The library's facilities can be divided into four broad categories: -- -- -- -- Changes in 0.10.1.0 -- -- -- -- Changed in 0.10.0.1 -- -- -- -- Changes in 0.10.0.0: -- -- @package statistics @version 0.10.1.0 module Statistics.Test.Types -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Significant if parameter is True, not significant otherwiser significant :: Bool -> TestResult instance Typeable TestType instance Typeable TestResult instance Eq TestType instance Ord TestType instance Show TestType instance Eq TestResult instance Ord TestResult instance Show TestResult -- | Fourier-related transformations of mathematical functions. -- -- These functions are written for simplicity and correctness, not speed. -- If you need a fast FFT implementation for your application, you should -- strongly consider using a library of FFTW bindings instead. module Statistics.Transform type CD = Complex Double -- | Discrete cosine transform (DCT-II). dct :: Vector Double -> Vector Double -- | Discrete cosine transform, with complex coefficients (DCT-II). dct_ :: Vector CD -> Vector Double -- | Inverse discrete cosine transform (DCT-III). It's inverse of -- dct only up to scale parameter: -- --
--   (idct . dct) x = (* lenngth x)
--   
idct :: Vector Double -> Vector Double -- | Inverse discrete cosine transform, with complex coefficients -- (DCT-III). idct_ :: Vector CD -> Vector Double -- | Radix-2 decimation-in-time fast Fourier transform. fft :: Vector CD -> Vector CD -- | Inverse fast Fourier transform. ifft :: Vector CD -> Vector CD -- | Mathematical functions for statistics. -- -- DEPRECATED. Use package math-functions instead. This module is just -- reexports functions from Numeric.SpecFunctions, -- Numeric.SpecFunctions.Extra and -- Numeric.Polynomial.Chebyshev. module Statistics.Math -- | Haskell functions for finding the roots of mathematical functions. module Statistics.Math.RootFinding -- | The result of searching for a root of a mathematical function. data Root a -- | The function does not have opposite signs when evaluated at the lower -- and upper bounds of the search. NotBracketed :: Root a -- | The search failed to converge to within the given error tolerance -- after the given number of iterations. SearchFailed :: Root a -- | A root was successfully found. Root :: a -> Root a -- | Returns either the result of a search for a root, or the default value -- if the search failed. fromRoot :: a -> Root a -> a -- | Use the method of Ridders to compute a root of a function. -- -- The function must have opposite signs when evaluated at the lower and -- upper bounds of the search (i.e. the root must be bracketed). ridders :: Double -> (Double, Double) -> (Double -> Double) -> Root Double instance Typeable1 Root instance Eq a => Eq (Root a) instance Read a => Read (Root a) instance Show a => Show (Root a) instance Alternative Root instance Applicative Root instance MonadPlus Root instance Monad Root instance Functor Root -- | Useful functions. module Statistics.Function -- | Compute the minimum and maximum of a vector in one pass. minMax :: Vector v Double => v Double -> (Double, Double) -- | Sort a vector. sort :: (Ord e, Vector v e) => v e -> v e -- | Sort a vector using a custom ordering. sortBy :: Vector v e => Comparison e -> v e -> v e -- | Partially sort a vector, such that the least k elements will be -- at the front. partialSort :: (Vector v e, Ord e) => Int -> v e -> v e -- | Zip a vector with its indices. indexed :: (Vector v e, Vector v Int, Vector v (Int, e)) => v e -> v (Int, e) -- | Return the indices of a vector. indices :: (Vector v a, Vector v Int) => v a -> v Int -- | Efficiently compute the next highest power of two for a non-negative -- integer. If the given value is already a power of two, it is returned -- unchanged. If negative, zero is returned. nextHighestPowerOfTwo :: Int -> Int -- | Compare two Double values for approximate equality, using -- Dawson's method. -- -- The required accuracy is specified in ULPs (units of least precision). -- If the two numbers differ by the given number of ULPs or less, this -- function returns True. within :: Int -> Double -> Double -> Bool -- | Functions for approximating quantiles, i.e. points taken at regular -- intervals from the cumulative distribution function of a random -- variable. -- -- The number of quantiles is described below by the variable q, -- so with q=4, a 4-quantile (also known as a quartile) has -- 4 intervals, and contains 5 points. The parameter k describes -- the desired point, where 0 ≤ kq. module Statistics.Quantile -- | O(n log n). Estimate the kth q-quantile of -- a sample, using the weighted average method. weightedAvg :: Vector v Double => Int -> Int -> v Double -> Double -- | Parameters a and b to the continuousBy function. data ContParam ContParam :: {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> ContParam -- | O(n log n). Estimate the kth q-quantile of -- a sample x, using the continuous sample method with the given -- parameters. This is the method used by most statistical software, such -- as R, Mathematica, SPSS, and S. continuousBy :: Vector v Double => ContParam -> Int -> Int -> v Double -> Double -- | O(n log n). Estimate the range between -- q-quantiles 1 and q-1 of a sample x, using the -- continuous sample method with the given parameters. -- -- For instance, the interquartile range (IQR) can be estimated as -- follows: -- --
--   midspread medianUnbiased 4 (U.fromList [1,1,2,2,3])
--   ==> 1.333333
--   
midspread :: Vector v Double => ContParam -> Int -> v Double -> Double -- | California Department of Public Works definition, a=0, -- b=1. Gives a linear interpolation of the empirical CDF. This -- corresponds to method 4 in R and Mathematica. cadpw :: ContParam -- | Hazen's definition, a=0.5, b=0.5. This is claimed to be -- popular among hydrologists. This corresponds to method 5 in R and -- Mathematica. hazen :: ContParam -- | Definition used by the S statistics application, with a=1, -- b=1. The interpolation points divide the sample range into -- n-1 intervals. This corresponds to method 7 in R and -- Mathematica. s :: ContParam -- | Definition used by the SPSS statistics application, with a=0, -- b=0 (also known as Weibull's definition). This corresponds to -- method 6 in R and Mathematica. spss :: ContParam -- | Median unbiased definition, a=1/3, b=1/3. The resulting -- quantile estimates are approximately median unbiased regardless of the -- distribution of x. This corresponds to method 8 in R and -- Mathematica. medianUnbiased :: ContParam -- | Normal unbiased definition, a=3/8, b=3/8. An -- approximately unbiased estimate if the empirical distribution -- approximates the normal distribution. This corresponds to method 9 in -- R and Mathematica. normalUnbiased :: ContParam -- | Functions for computing histograms of sample data. module Statistics.Sample.Histogram -- | O(n) Compute a histogram over a data set. -- -- The result consists of a pair of vectors: -- -- -- -- Interval (bin) sizes are uniform, and the upper and lower bounds are -- chosen automatically using the range function. To specify these -- parameters directly, use the histogram_ function. histogram :: (Vector v0 Double, Vector v1 Double, Num b, Vector v1 b) => Int -> v0 Double -> (v1 Double, v1 b) -- | O(n) Compute a histogram over a data set. -- -- Interval (bin) sizes are uniform, based on the supplied upper and -- lower bounds. histogram_ :: (Num b, RealFrac a, Vector v0 a, Vector v1 b) => Int -> a -> a -> v0 a -> v1 b -- | O(n) Compute decent defaults for the lower and upper bounds of -- a histogram, based on the desired number of bins and the range of the -- sample data. -- -- The upper and lower bounds used are (lo-d, hi+d), where -- --
--   d = (maximum sample - minimum sample) / ((bins - 1) * 2)
--   
range :: Vector v Double => Int -> v Double -> (Double, Double) -- | Kernel density estimation. This module provides a fast, robust, -- non-parametric way to estimate the probability density function of a -- sample. -- -- This estimator does not use the commonly employed "Gaussian rule of -- thumb". As a result, it outperforms many plug-in methods on multimodal -- samples with widely separated modes. module Statistics.Sample.KernelDensity -- | Gaussian kernel density estimator for one-dimensional data, using the -- method of Botev et al. -- -- The result is a pair of vectors, containing: -- -- kde :: Int -> Vector Double -> (Vector Double, Vector Double) -- | Gaussian kernel density estimator for one-dimensional data, using the -- method of Botev et al. -- -- The result is a pair of vectors, containing: -- -- kde_ :: Int -> Double -> Double -> Vector Double -> (Vector Double, Vector Double) -- | Very fast statistics over simple powers of a sample. These can all be -- computed efficiently in just a single pass over a sample, with that -- pass subject to stream fusion. -- -- The tradeoff is that some of these functions are less numerically -- robust than their counterparts in the Statistics.Sample -- module. Where this is the case, the alternatives are noted. module Statistics.Sample.Powers data Powers -- | O(n) Collect the n simple powers of a sample. -- -- Functions computed over a sample's simple powers require at least a -- certain number (or order) of powers to be collected. -- -- -- -- This function is subject to stream fusion. powers :: Vector v Double => Int -> v Double -> Powers -- | The order (number) of simple powers collected from a sample. order :: Powers -> Int -- | The number of elements in the original Sample. This is the -- sample's zeroth simple power. count :: Powers -> Int -- | The sum of elements in the original Sample. This is the -- sample's first simple power. sum :: Powers -> Double -- | The arithmetic mean of elements in the original Sample. -- -- This is less numerically robust than the mean function in the -- Statistics.Sample module, but the number is essentially free -- to compute if you have already collected a sample's simple powers. mean :: Powers -> Double -- | Maximum likelihood estimate of a sample's variance. Also known as the -- population variance, where the denominator is n. This is the -- second central moment of the sample. -- -- This is less numerically robust than the variance function in the -- Statistics.Sample module, but the number is essentially free -- to compute if you have already collected a sample's simple powers. -- -- Requires Powers with order at least 2. variance :: Powers -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. stdDev :: Powers -> Double -- | Unbiased estimate of a sample's variance. Also known as the sample -- variance, where the denominator is n-1. -- -- Requires Powers with order at least 2. varianceUnbiased :: Powers -> Double -- | Compute the kth central moment of a sample. The central moment -- is also known as the moment about the mean. centralMoment :: Int -> Powers -> Double -- | Compute the skewness of a sample. This is a measure of the asymmetry -- of its distribution. -- -- A sample with negative skew is said to be left-skewed. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- --
--   skewness . powers 3 $ U.to [1,100,101,102,103]
--   ==> -1.497681449918257
--   
-- -- A sample with positive skew is said to be right-skewed. -- --
--   skewness . powers 3 $ U.to [1,2,3,4,100]
--   ==> 1.4975367033335198
--   
-- -- A sample's skewness is not defined if its variance is zero. -- -- Requires Powers with order at least 3. skewness :: Powers -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that the -- sample's variance is due more to infrequent severe deviations than to -- frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- Requires Powers with order at least 4. kurtosis :: Powers -> Double instance Eq Powers instance Show Powers -- | Types for working with statistics. module Statistics.Types -- | A function that estimates a property of a sample, such as its -- mean. type Estimator = Sample -> Double -- | Sample data. type Sample = Vector Double -- | Sample with weights. First element of sample is data, second is weight type WeightedSample = Vector (Double, Double) -- | Weights for affecting the importance of elements of a sample. type Weights = Vector Double -- | Resampling statistics. module Statistics.Resampling -- | A resample drawn randomly, with replacement, from a set of data -- points. Distinct from a normal array to make it harder for your humble -- author's brain to go wrong. newtype Resample Resample :: Vector Double -> Resample fromResample :: Resample -> Vector Double -- | Compute a statistical estimate repeatedly over a sample, each time -- omitting a successive element. jackknife :: Estimator -> Sample -> Vector Double -- | O(e*r*s) Resample a data set repeatedly, with replacement, -- computing each estimate over the resampled data. -- -- This function is expensive; it has to do work proportional to -- e*r*s, where e is the number of estimation functions, -- r is the number of resamples to compute, and s is the -- number of original samples. -- -- To improve performance, this function will make use of all available -- CPUs. At least with GHC 7.0, parallel performance seems best if the -- parallel garbage collector is disabled (RTS option -qg). resample :: Gen (PrimState IO) -> [Estimator] -> Int -> Sample -> IO [Resample] instance Eq Resample instance Show Resample -- | The Wilcoxon matched-pairs signed-rank test is non-parametric test -- which could be used to whether two related samples have different -- means. -- -- WARNING: current implementation contain critical bugs -- https://github.com/bos/statistics/issues/18 module Statistics.Test.WilcoxonT -- | The Wilcoxon matched-pairs signed-rank test. The samples are zipped -- together: if one is longer than the other, both are truncated to the -- the length of the shorter sample. -- -- For one-tailed test it tests whether first sample is significantly -- greater than the second. For two-tailed it checks whether they -- significantly differ -- -- Check wilcoxonMatchedPairSignedRank and -- wilcoxonMatchedPairSignificant for additional information. wilcoxonMatchedPairTest :: TestType -> Double -> Sample -> Sample -> Maybe TestResult -- | The Wilcoxon matched-pairs signed-rank test. -- -- The value returned is the pair (T+, T-). T+ is the sum of positive -- ranks (the ranks of the differences where the first parameter is -- higher) whereas T- is the sum of negative ranks (the ranks of the -- differences where the second parameter is higher). These values mean -- little by themselves, and should be combined with the -- wilcoxonSignificant function in this module to get a -- meaningful result. -- -- The samples are zipped together: if one is longer than the other, both -- are truncated to the the length of the shorter sample. -- -- Note that: wilcoxonMatchedPairSignedRank == ((x, y) -> (y, x)) . -- flip wilcoxonMatchedPairSignedRank wilcoxonMatchedPairSignedRank :: Sample -> Sample -> (Double, Double) -- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs -- test is significant at the given level. -- -- This function can perform a one-tailed or two-tailed test. If the -- first parameter to this function is TwoTailed, the test is -- performed two-tailed to check if the two samples differ significantly. -- If the first parameter is OneTailed, the check is performed -- one-tailed to decide whether the first sample (i.e. the first sample -- you passed to wilcoxonMatchedPairSignedRank) is greater than -- the second sample (i.e. the second sample you passed to -- wilcoxonMatchedPairSignedRank). If you wish to perform a -- one-tailed test in the opposite direction, you can either pass the -- parameters in a different order to -- wilcoxonMatchedPairSignedRank, or simply swap the values in the -- resulting pair before passing them to this function. wilcoxonMatchedPairSignificant :: TestType -> Int -> Double -> (Double, Double) -> Maybe TestResult -- | Works out the significance level (p-value) of a T value, given a -- sample size and a T value from the Wilcoxon signed-rank matched-pairs -- test. -- -- See the notes on wilcoxonCriticalValue for how this is -- calculated. wilcoxonMatchedPairSignificance :: Int -> Double -> Double -- | Obtains the critical value of T to compare against, given a sample -- size and a p-value (significance level). Your T value must be less -- than or equal to the return of this function in order for the test to -- work out significant. If there is a Nothing return, the sample size is -- too small to make a decision. -- -- wilcoxonSignificant tests the return value of -- wilcoxonMatchedPairSignedRank for you, so you should use -- wilcoxonSignificant for determining test results. However, -- this function is useful, for example, for generating lookup tables for -- Wilcoxon signed rank critical values. -- -- The return values of this function are generated using the method -- detailed in the paper "Critical Values for the Wilcoxon Signed Rank -- Statistic", Peter Mitic, The Mathematica Journal, volume 6, issue 3, -- 1996, which can be found here: -- http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf. -- According to that paper, the results may differ from other published -- lookup tables, but (Mitic claims) the values obtained by this function -- will be the correct ones. wilcoxonMatchedPairCriticalValue :: Int -> Double -> Maybe Int -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Types classes for probability distrubutions module Statistics.Distribution -- | Type class common to all distributions. Only c.d.f. could be defined -- for both discrete and continous distributions. class Distribution d cumulative :: Distribution d => d -> Double -> Double complCumulative :: Distribution d => d -> Double -> Double -- | Discrete probability distribution. class Distribution d => DiscreteDistr d probability :: DiscreteDistr d => d -> Int -> Double -- | Continuous probability distributuion class Distribution d => ContDistr d density :: ContDistr d => d -> Double -> Double quantile :: ContDistr d => d -> Double -> Double -- | Type class for distributions with mean. maybeMean should return -- Nothing if it's undefined for current value of data class Distribution d => MaybeMean d maybeMean :: MaybeMean d => d -> Maybe Double -- | Type class for distributions with mean. If distribution have finite -- mean for all valid values of parameters it should be instance of this -- type class. class MaybeMean d => Mean d mean :: Mean d => d -> Double -- | Type class for distributions with variance. If variance is undefined -- for some parameter values both maybeVariance and -- maybeStdDev should return Nothing. -- -- Minimal complete definition is maybeVariance or -- maybeStdDev class MaybeMean d => MaybeVariance d maybeVariance :: MaybeVariance d => d -> Maybe Double maybeStdDev :: MaybeVariance d => d -> Maybe Double -- | Type class for distributions with variance. If distibution have finite -- variance for all valid parameter values it should be instance of this -- type class. -- -- Minimal complete definition is variance or stdDev class (Mean d, MaybeVariance d) => Variance d variance :: Variance d => d -> Double stdDev :: Variance d => d -> Double -- | Generate discrete random variates which have given distribution. class Distribution d => ContGen d genContVar :: (ContGen d, PrimMonad m) => d -> Gen (PrimState m) -> m Double -- | Generate discrete random variates which have given distribution. -- ContGen is superclass because it's always possible to generate -- real-valued variates from integer values class (DiscreteDistr d, ContGen d) => DiscreteGen d genDiscreteVar :: (DiscreteGen d, PrimMonad m) => d -> Gen (PrimState m) -> m Int -- | Approximate the value of X for which -- P(x>X)=p. -- -- This method uses a combination of Newton-Raphson iteration and -- bisection with the given guess as a starting point. The upper and -- lower bounds specify the interval in which the probability -- distribution reaches the value p. findRoot :: ContDistr d => d -> Double -> Double -> Double -> Double -> Double -- | Sum probabilities in inclusive interval. sumProbabilities :: DiscreteDistr d => d -> Int -> Int -> Double -- | The binomial distribution. This is the discrete probability -- distribution of the number of successes in a sequence of n -- independent yes/no experiments, each of which yields success with -- probability p. module Statistics.Distribution.Binomial -- | The binomial distribution. data BinomialDistribution -- | Construct binomial distribution. Number of trials must be positive and -- probability must be in [0,1] range binomial :: Int -> Double -> BinomialDistribution -- | Number of trials. bdTrials :: BinomialDistribution -> Int -- | Probability. bdProbability :: BinomialDistribution -> Double instance Typeable BinomialDistribution instance Eq BinomialDistribution instance Read BinomialDistribution instance Show BinomialDistribution instance MaybeVariance BinomialDistribution instance MaybeMean BinomialDistribution instance Variance BinomialDistribution instance Mean BinomialDistribution instance DiscreteDistr BinomialDistribution instance Distribution BinomialDistribution -- | The Cauchy-Lorentz distribution. It's also known as Lorentz -- distribution or BreitWigner distribution. -- -- It doesn't have mean and variance. module Statistics.Distribution.CauchyLorentz -- | Cauchy-Lorentz distribution. data CauchyDistribution -- | Central value of Cauchy-Lorentz distribution which is its mode and -- median. Distribution doesn't have mean so function is named after -- median. cauchyDistribMedian :: CauchyDistribution -> Double -- | Scale parameter of Cauchy-Lorentz distribution. It's different from -- variance and specify half width at half maximum (HWHM). cauchyDistribScale :: CauchyDistribution -> Double -- | Cauchy distribution cauchyDistribution :: Double -> Double -> CauchyDistribution standardCauchy :: CauchyDistribution instance Typeable CauchyDistribution instance Eq CauchyDistribution instance Show CauchyDistribution instance Read CauchyDistribution instance ContDistr CauchyDistribution instance Distribution CauchyDistribution -- | The chi-squared distribution. This is a continuous probability -- distribution of sum of squares of k independent standard normal -- distributions. It's commonly used in statistical tests module Statistics.Distribution.ChiSquared -- | Chi-squared distribution data ChiSquared -- | Construct chi-squared distribution. Number of degrees of freedom must -- be positive. chiSquared :: Int -> ChiSquared -- | Get number of degrees of freedom chiSquaredNDF :: ChiSquared -> Int instance Typeable ChiSquared instance Show ChiSquared instance MaybeVariance ChiSquared instance MaybeMean ChiSquared instance Variance ChiSquared instance Mean ChiSquared instance ContDistr ChiSquared instance Distribution ChiSquared -- | Fisher F distribution module Statistics.Distribution.FDistribution -- | Student-T distribution data FDistribution fDistribution :: Int -> Int -> FDistribution fDistributionNDF1 :: FDistribution -> Double fDistributionNDF2 :: FDistribution -> Double instance Typeable FDistribution instance Eq FDistribution instance Show FDistribution instance Read FDistribution instance MaybeVariance FDistribution instance MaybeMean FDistribution instance ContDistr FDistribution instance Distribution FDistribution -- | The gamma distribution. This is a continuous probability distribution -- with two parameters, k and ϑ. If k is integral, the -- distribution represents the sum of k independent exponentially -- distributed random variables, each of which has a mean of ϑ. module Statistics.Distribution.Gamma -- | The gamma distribution. data GammaDistribution -- | Create gamma distribution. Both shape and scale parameters must be -- positive. gammaDistr :: Double -> Double -> GammaDistribution -- | Shape parameter, k. gdShape :: GammaDistribution -> Double -- | Scale parameter, ϑ. gdScale :: GammaDistribution -> Double instance Typeable GammaDistribution instance Eq GammaDistribution instance Read GammaDistribution instance Show GammaDistribution instance MaybeVariance GammaDistribution instance MaybeMean GammaDistribution instance Mean GammaDistribution instance Variance GammaDistribution instance ContDistr GammaDistribution instance Distribution GammaDistribution -- | The Poisson distribution. This is the discrete probability -- distribution of a number of events occurring in a fixed interval if -- these events occur with a known average rate, and occur independently -- from each other within that interval. module Statistics.Distribution.Poisson data PoissonDistribution -- | Create Poisson distribution. poisson :: Double -> PoissonDistribution poissonLambda :: PoissonDistribution -> Double instance Typeable PoissonDistribution instance Eq PoissonDistribution instance Read PoissonDistribution instance Show PoissonDistribution instance MaybeVariance PoissonDistribution instance MaybeMean PoissonDistribution instance Mean PoissonDistribution instance Variance PoissonDistribution instance DiscreteDistr PoissonDistribution instance Distribution PoissonDistribution -- | The Geometric distribution. This is the probability distribution of -- the number of Bernoulli trials needed to get one success, supported on -- the set [1,2..]. -- -- This distribution is sometimes referred to as the shifted -- geometric distribution, to distinguish it from a variant measuring the -- number of failures before the first success, defined over the set -- [0,1..]. module Statistics.Distribution.Geometric data GeometricDistribution -- | Create geometric distribution. geometric :: Double -> GeometricDistribution gdSuccess :: GeometricDistribution -> Double instance Typeable GeometricDistribution instance Eq GeometricDistribution instance Read GeometricDistribution instance Show GeometricDistribution instance MaybeVariance GeometricDistribution instance MaybeMean GeometricDistribution instance Variance GeometricDistribution instance Mean GeometricDistribution instance DiscreteDistr GeometricDistribution instance Distribution GeometricDistribution -- | The Hypergeometric distribution. This is the discrete probability -- distribution that measures the probability of k successes in -- l trials, without replacement, from a finite population. -- -- The parameters of the distribution describe k elements chosen -- from a population of l, with m elements of one type, and -- l-m of the other (all are positive integers). module Statistics.Distribution.Hypergeometric data HypergeometricDistribution hypergeometric :: Int -> Int -> Int -> HypergeometricDistribution hdM :: HypergeometricDistribution -> Int hdL :: HypergeometricDistribution -> Int hdK :: HypergeometricDistribution -> Int instance Typeable HypergeometricDistribution instance Eq HypergeometricDistribution instance Read HypergeometricDistribution instance Show HypergeometricDistribution instance MaybeVariance HypergeometricDistribution instance MaybeMean HypergeometricDistribution instance Variance HypergeometricDistribution instance Mean HypergeometricDistribution instance DiscreteDistr HypergeometricDistribution instance Distribution HypergeometricDistribution -- | Student-T distribution module Statistics.Distribution.StudentT -- | Student-T distribution data StudentT -- | Create Student-T distribution. Number of parameters must be positive. studentT :: Double -> StudentT studentTndf :: StudentT -> Double instance Typeable StudentT instance Eq StudentT instance Show StudentT instance Read StudentT instance MaybeVariance StudentT instance MaybeMean StudentT instance ContDistr StudentT instance Distribution StudentT -- | Variate distributed uniformly in the interval. module Statistics.Distribution.Uniform -- | Uniform distribution data UniformDistribution -- | Create uniform distribution. uniformDistr :: Double -> Double -> UniformDistribution instance Typeable UniformDistribution instance Eq UniformDistribution instance Show UniformDistribution instance Read UniformDistribution instance ContGen UniformDistribution instance MaybeVariance UniformDistribution instance MaybeMean UniformDistribution instance Variance UniformDistribution instance Mean UniformDistribution instance ContDistr UniformDistribution instance Distribution UniformDistribution -- | Pearson's chi squared test. module Statistics.Test.ChiSquared -- | Generic form of Pearson chi squared tests for binned data. Data sample -- is supplied in form of tuples (observed quantity, expected number of -- events). Both must be positive. chi2test :: (Vector v (Int, Double), Vector v Double) => Double -> Int -> v (Int, Double) -> TestResult -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Kolmogov-Smirnov tests are non-parametric tests for assesing whether -- given sample could be described by distribution or whether two samples -- have the same distribution. module Statistics.Test.KolmogorovSmirnov -- | Check that sample could be described by distribution. -- Significant means distribution is not compatible with data for -- given p-value. -- -- This test uses Marsaglia-Tsang-Wang exact alogorithm for calculation -- of p-value. kolmogorovSmirnovTest :: Distribution d => d -> Double -> Sample -> TestResult -- | Variant of kolmogorovSmirnovTest which uses CFD in form of -- function. kolmogorovSmirnovTestCdf :: (Double -> Double) -> Double -> Sample -> TestResult -- | Two sample Kolmogorov-Smirnov test. It tests whether two data samples -- could be described by the same distribution without making any -- assumptions about it. -- -- This test uses approxmate formula for computing p-value. kolmogorovSmirnovTest2 :: Double -> Sample -> Sample -> TestResult -- | Calculate Kolmogorov's statistic D for given cumulative -- distribution function (CDF) and data sample. If sample is empty -- returns 0. kolmogorovSmirnovCdfD :: (Double -> Double) -> Sample -> Double -- | Calculate Kolmogorov's statistic D for given cumulative -- distribution function (CDF) and data sample. If sample is empty -- returns 0. kolmogorovSmirnovD :: Distribution d => d -> Sample -> Double -- | Calculate Kolmogorov's statistic D for two data samples. If -- either of samples is empty returns 0. kolmogorovSmirnov2D :: Sample -> Sample -> Double -- | Calculate cumulative probability function for Kolmogorov's -- distribution with n parameters or probability of getting value -- smaller than d with n-elements sample. -- -- It uses algorithm by Marsgalia et. al. and provide at least 7-digit -- accuracy. kolmogorovSmirnovProbability :: Int -> Double -> Double -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult instance Show Matrix -- | Constant values common to much statistics code. -- -- DEPRECATED: use module Numeric.MathFunctions.Constants from -- math-functions. module Statistics.Constants -- | Commonly used sample statistics, also known as descriptive statistics. module Statistics.Sample -- | Sample data. type Sample = Vector Double -- | Sample with weights. First element of sample is data, second is weight type WeightedSample = Vector (Double, Double) -- | O(n) Range. The difference between the largest and smallest -- elements of a sample. range :: Vector v Double => v Double -> Double -- | O(n) Arithmetic mean. This uses Welford's algorithm to provide -- numerical stability, using a single pass over the sample data. mean :: Vector v Double => v Double -> Double -- | O(n) Arithmetic mean for weighted sample. It uses a single-pass -- algorithm analogous to the one used by mean. meanWeighted :: Vector v (Double, Double) => v (Double, Double) -> Double -- | O(n) Harmonic mean. This algorithm performs a single pass over -- the sample. harmonicMean :: Vector v Double => v Double -> Double -- | O(n) Geometric mean of a sample containing no negative values. geometricMean :: Vector v Double => v Double -> Double -- | Compute the kth central moment of a sample. The central moment -- is also known as the moment about the mean. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoment :: Vector v Double => Int -> v Double -> Double -- | Compute the kth and jth central moments of a sample. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoments :: Vector v Double => Int -> Int -> v Double -> (Double, Double) -- | Compute the skewness of a sample. This is a measure of the asymmetry -- of its distribution. -- -- A sample with negative skew is said to be left-skewed. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- --
--   skewness $ U.to [1,100,101,102,103]
--   ==> -1.497681449918257
--   
-- -- A sample with positive skew is said to be right-skewed. -- --
--   skewness $ U.to [1,2,3,4,100]
--   ==> 1.4975367033335198
--   
-- -- A sample's skewness is not defined if its variance is zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. skewness :: Vector v Double => v Double -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that more -- of the sample's variance is due to infrequent severe deviations, -- rather than more frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. kurtosis :: Vector v Double => v Double -> Double -- | Maximum likelihood estimate of a sample's variance. Also known as the -- population variance, where the denominator is n. variance :: Vector v Double => v Double -> Double -- | Unbiased estimate of a sample's variance. Also known as the sample -- variance, where the denominator is n-1. varianceUnbiased :: Vector v Double => v Double -> Double -- | Calculate mean and maximum likelihood estimate of variance. This -- function should be used if both mean and variance are required since -- it will calculate mean only once. meanVariance :: Vector v Double => v Double -> (Double, Double) -- | Calculate mean and unbiased estimate of variance. This function should -- be used if both mean and variance are required since it will calculate -- mean only once. meanVarianceUnb :: Vector v Double => v Double -> (Double, Double) -- | Standard deviation. This is simply the square root of the unbiased -- estimate of the variance. stdDev :: Vector v Double => v Double -> Double -- | Weighted variance. This is biased estimation. varianceWeighted :: Vector v (Double, Double) => v (Double, Double) -> Double -- | Maximum likelihood estimate of a sample's variance. fastVariance :: Vector v Double => v Double -> Double -- | Unbiased estimate of a sample's variance. fastVarianceUnbiased :: Vector v Double => v Double -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. fastStdDev :: Vector v Double => v Double -> Double -- | The exponential distribution. This is the continunous probability -- distribution of the times between events in a poisson process, in -- which events occur continuously and independently at a constant -- average rate. module Statistics.Distribution.Exponential data ExponentialDistribution -- | Create an exponential distribution. exponential :: Double -> ExponentialDistribution -- | Create exponential distribution from sample. No tests are made to -- check whether it truly is exponential. exponentialFromSample :: Sample -> ExponentialDistribution edLambda :: ExponentialDistribution -> Double instance Typeable ExponentialDistribution instance Eq ExponentialDistribution instance Read ExponentialDistribution instance Show ExponentialDistribution instance MaybeVariance ExponentialDistribution instance MaybeMean ExponentialDistribution instance Variance ExponentialDistribution instance Mean ExponentialDistribution instance ContDistr ExponentialDistribution instance Distribution ExponentialDistribution -- | The normal distribution. This is a continuous probability distribution -- that describes data that cluster around a mean. module Statistics.Distribution.Normal -- | The normal distribution. data NormalDistribution -- | Create normal distribution from parameters. -- -- IMPORTANT: prior to 0.10 release second parameter was variance not -- standard deviation. normalDistr :: Double -> Double -> NormalDistribution -- | Create distribution using parameters estimated from sample. Variance -- is estimated using maximum likelihood method (biased estimation). normalFromSample :: Sample -> NormalDistribution -- | Standard normal distribution with mean equal to 0 and variance equal -- to 1 standard :: NormalDistribution instance Typeable NormalDistribution instance Eq NormalDistribution instance Read NormalDistribution instance Show NormalDistribution instance ContGen NormalDistribution instance Variance NormalDistribution instance MaybeVariance NormalDistribution instance Mean NormalDistribution instance MaybeMean NormalDistribution instance ContDistr NormalDistribution instance Distribution NormalDistribution -- | Mann-Whitney U test (also know as Mann-Whitney-Wilcoxon and Wilcoxon -- rank sum test) is a non-parametric test for assesing whether two -- samples of independent observations have different mean. module Statistics.Test.MannWhitneyU -- | Perform Mann-Whitney U Test for two samples and required significance. -- For additional information check documentation of mannWhitneyU -- and mannWhitneyUSignificant. This is just a helper function. -- -- One-tailed test checks whether first sample is significantly larger -- than second. Two-tailed whether they are significantly different. mannWhitneyUtest :: TestType -> Double -> Sample -> Sample -> Maybe TestResult -- | The Mann-Whitney U Test. -- -- This is sometimes known as the Mann-Whitney-Wilcoxon U test, and -- confusingly many sources state that the Mann-Whitney U test is the -- same as the Wilcoxon's rank sum test (which is provided as -- wilcoxonRankSums). The Mann-Whitney U is a simple transform of -- Wilcoxon's rank sum test. -- -- Again confusingly, different sources state reversed definitions for U₁ -- and U₂, so it is worth being explicit about what this function -- returns. Given two samples, the first, xs₁, of size n₁ and the second, -- xs₂, of size n₂, this function returns (U₁, U₂) where U₁ = W₁ - -- (n₁(n₁+1))/2 and U₂ = W₂ - (n₂(n₂+1))/2, where (W₁, W₂) is the return -- value of wilcoxonRankSums xs1 xs2. -- -- Some sources instead state that U₁ and U₂ should be the other way -- round, often expressing this using U₁' = n₁n₂ - U₁ (since U₁ + U₂ = -- n₁n₂). -- -- All of which you probably don't care about if you just feed this into -- mannWhitneyUSignificant. mannWhitneyU :: Sample -> Sample -> (Double, Double) -- | Calculates the critical value of Mann-Whitney U for the given sample -- sizes and significance level. -- -- This function returns the exact calculated value of U for all sample -- sizes; it does not use the normal approximation at all. Above sample -- size 20 it is generally recommended to use the normal approximation -- instead, but this function will calculate the higher critical values -- if you need them. -- -- The algorithm to generate these values is a faster, memoised version -- of the simple unoptimised generating function given in section 2 of -- "The Mann Whitney Wilcoxon Distribution Using Linked Lists" mannWhitneyUCriticalValue :: (Int, Int) -> Double -> Maybe Int -- | Calculates whether the Mann Whitney U test is significant. -- -- If both sample sizes are less than or equal to 20, the exact U -- critical value (as calculated by mannWhitneyUCriticalValue) is -- used. If either sample is larger than 20, the normal approximation is -- used instead. -- -- If you use a one-tailed test, the test indicates whether the first -- sample is significantly larger than the second. If you want the -- opposite, simply reverse the order in both the sample size and the -- (U₁, U₂) pairs. mannWhitneyUSignificant :: TestType -> (Int, Int) -> Double -> (Double, Double) -> Maybe TestResult -- | The Wilcoxon Rank Sums Test. -- -- This test calculates the sum of ranks for the given two samples. The -- samples are ordered, and assigned ranks (ties are given their average -- rank), then these ranks are summed for each sample. -- -- The return value is (W₁, W₂) where W₁ is the sum of ranks of the first -- sample and W₂ is the sum of ranks of the second sample. This test is -- trivially transformed into the Mann-Whitney U test. You will probably -- want to use mannWhitneyU and the related functions for testing -- significance, but this function is exposed for completeness. wilcoxonRankSums :: Sample -> Sample -> (Double, Double) -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Functions for performing non-parametric tests (i.e. tests without an -- assumption of underlying distribution). module Statistics.Test.NonParametric -- | The bootstrap method for statistical inference. module Statistics.Resampling.Bootstrap -- | A point and interval estimate computed via an Estimator. data Estimate Estimate :: {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> Estimate -- | Point estimate. estPoint :: Estimate -> {-# UNPACK #-} !Double -- | Lower bound of the estimate interval (i.e. the lower bound of the -- confidence interval). estLowerBound :: Estimate -> {-# UNPACK #-} !Double -- | Upper bound of the estimate interval (i.e. the upper bound of the -- confidence interval). estUpperBound :: Estimate -> {-# UNPACK #-} !Double -- | Confidence level of the confidence intervals. estConfidenceLevel :: Estimate -> {-# UNPACK #-} !Double -- | Bias-corrected accelerated (BCA) bootstrap. This adjusts for both bias -- and skewness in the resampled distribution. bootstrapBCA :: Double -> Sample -> [Estimator] -> [Resample] -> [Estimate] -- | Multiply the point, lower bound, and upper bound in an Estimate -- by the given value. scale :: Double -> Estimate -> Estimate instance Typeable Estimate instance Eq Estimate instance Show Estimate instance Data Estimate instance NFData Estimate -- | Kernel density estimation code, providing non-parametric ways to -- estimate the probability density function of a sample. -- -- The techniques used by functions in this module are relatively fast, -- but they generally give inferior results to the KDE function in the -- main Statistics.KernelDensity module (due to the -- oversmoothing documented for bandwidth below). module Statistics.Sample.KernelDensity.Simple -- | Simple Epanechnikov kernel density estimator. Returns the uniformly -- spaced points from the sample range at which the density function was -- estimated, and the estimates at those points. epanechnikovPDF :: Vector v Double => Int -> v Double -> (Points, Vector Double) -- | Simple Gaussian kernel density estimator. Returns the uniformly spaced -- points from the sample range at which the density function was -- estimated, and the estimates at those points. gaussianPDF :: Vector v Double => Int -> v Double -> (Points, Vector Double) -- | Points from the range of a Sample. newtype Points Points :: Vector Double -> Points fromPoints :: Points -> Vector Double -- | Choose a uniform range of points at which to estimate a sample's -- probability density function. -- -- If you are using a Gaussian kernel, multiply the sample's bandwidth by -- 3 before passing it to this function. -- -- If this function is passed an empty vector, it returns values of -- positive and negative infinity. choosePoints :: Vector v Double => Int -> Double -> v Double -> Points -- | The width of the convolution kernel used. type Bandwidth = Double -- | Compute the optimal bandwidth from the observed data for the given -- kernel. -- -- This function uses an estimate based on the standard deviation of a -- sample (due to Deheuvels), which performs reasonably well for unimodal -- distributions but leads to oversmoothing for more complex ones. bandwidth :: Vector v Double => (Double -> Bandwidth) -> v Double -> Bandwidth -- | Bandwidth estimator for an Epanechnikov kernel. epanechnikovBW :: Double -> Bandwidth -- | Bandwidth estimator for a Gaussian kernel. gaussianBW :: Double -> Bandwidth -- | The convolution kernel. Its parameters are as follows: -- -- type Kernel = Double -> Double -> Double -> Double -> Double -- | Epanechnikov kernel for probability density function estimation. epanechnikovKernel :: Kernel -- | Gaussian kernel for probability density function estimation. gaussianKernel :: Kernel -- | Kernel density estimator, providing a non-parametric way of estimating -- the PDF of a random variable. estimatePDF :: Vector v Double => Kernel -> Bandwidth -> v Double -> Points -> Vector Double -- | A helper for creating a simple kernel density estimation function with -- automatically chosen bandwidth and estimation points. simplePDF :: Vector v Double => (Double -> Double) -> Kernel -> Double -> Int -> v Double -> (Points, Vector Double) instance Eq Points instance Show Points -- | Functions for computing autocovariance and autocorrelation of a -- sample. module Statistics.Autocorrelation -- | Compute the autocovariance of a sample, i.e. the covariance of the -- sample against a shifted version of itself. autocovariance :: (Vector v Double, Vector v Int) => v Double -> v Double -- | Compute the autocorrelation function of a sample, and the upper and -- lower bounds of confidence intervals for each element. -- -- Note: The calculation of the 95% confidence interval assumes a -- stationary Gaussian process. autocorrelation :: (Vector v Double, Vector v Int) => v Double -> (v Double, v Double, v Double)