-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A library of statistical types, data, and functions -- @package statistics @version 0.13.2.2 module Statistics.Test.Types -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Significant if parameter is True, not significant otherwiser significant :: Bool -> TestResult instance Typeable TestType instance Typeable TestResult instance Eq TestType instance Ord TestType instance Show TestType instance Data TestType instance Generic TestType instance Eq TestResult instance Ord TestResult instance Show TestResult instance Data TestResult instance Generic TestResult instance Datatype D1TestType instance Constructor C1_0TestType instance Constructor C1_1TestType instance Datatype D1TestResult instance Constructor C1_0TestResult instance Constructor C1_1TestResult instance ToJSON TestResult instance FromJSON TestResult instance ToJSON TestType instance FromJSON TestType -- | Fourier-related transformations of mathematical functions. -- -- These functions are written for simplicity and correctness, not speed. -- If you need a fast FFT implementation for your application, you should -- strongly consider using a library of FFTW bindings instead. module Statistics.Transform type CD = Complex Double -- | Discrete cosine transform (DCT-II). dct :: Vector Double -> Vector Double -- | Discrete cosine transform (DCT-II). Only real part of vector is -- transformed, imaginary part is ignored. dct_ :: Vector CD -> Vector Double -- | Inverse discrete cosine transform (DCT-III). It's inverse of -- dct only up to scale parameter: -- --
-- (idct . dct) x = (* length x) --idct :: Vector Double -> Vector Double -- | Inverse discrete cosine transform (DCT-III). Only real part of vector -- is transformed, imaginary part is ignored. idct_ :: Vector CD -> Vector Double -- | Radix-2 decimation-in-time fast Fourier transform. fft :: Vector CD -> Vector CD -- | Inverse fast Fourier transform. ifft :: Vector CD -> Vector CD -- | Basic matrix operations. -- -- There isn't a widely used matrix package for Haskell yet, so we -- implement the necessary minimum here. module Statistics.Matrix.Types type Vector = Vector Double type MVector s = MVector s Double -- | Two-dimensional matrix, stored in row-major order. data Matrix Matrix :: {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> !Vector -> Matrix -- | Rows of matrix. rows :: Matrix -> {-# UNPACK #-} !Int -- | Columns of matrix. cols :: Matrix -> {-# UNPACK #-} !Int -- | In order to avoid overflows during matrix multiplication, a large -- exponent is stored separately. exponent :: Matrix -> {-# UNPACK #-} !Int -- | Matrix data. _vector :: Matrix -> !Vector -- | Two-dimensional mutable matrix, stored in row-major order. data MMatrix s MMatrix :: {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> !(MVector s) -> MMatrix s debug :: Matrix -> String instance Eq Matrix instance Show Matrix -- | Basic mutable matrix operations. module Statistics.Matrix.Mutable -- | Two-dimensional mutable matrix, stored in row-major order. data MMatrix s MMatrix :: {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> !(MVector s) -> MMatrix s type MVector s = MVector s Double replicate :: Int -> Int -> Double -> ST s (MMatrix s) thaw :: Matrix -> ST s (MMatrix s) -- | Given row and column numbers, calculate the offset into the flat -- row-major vector. bounds :: MMatrix s -> Int -> Int -> (MVector s -> Int -> r) -> r unsafeFreeze :: MMatrix s -> ST s Matrix unsafeRead :: MMatrix s -> Int -> Int -> ST s Double unsafeWrite :: MMatrix s -> Int -> Int -> Double -> ST s () unsafeModify :: MMatrix s -> Int -> Int -> (Double -> Double) -> ST s () immutably :: NFData a => MMatrix s -> (Matrix -> a) -> ST s a -- | Given row and column numbers, calculate the offset into the flat -- row-major vector, without checking. unsafeBounds :: MMatrix s -> Int -> Int -> (MVector s -> Int -> r) -> r -- | Haskell functions for finding the roots of mathematical functions. module Statistics.Math.RootFinding -- | The result of searching for a root of a mathematical function. data Root a -- | The function does not have opposite signs when evaluated at the lower -- and upper bounds of the search. NotBracketed :: Root a -- | The search failed to converge to within the given error tolerance -- after the given number of iterations. SearchFailed :: Root a -- | A root was successfully found. Root :: a -> Root a -- | Returns either the result of a search for a root, or the default value -- if the search failed. fromRoot :: a -> Root a -> a -- | Use the method of Ridders to compute a root of a function. -- -- The function must have opposite signs when evaluated at the lower and -- upper bounds of the search (i.e. the root must be bracketed). ridders :: Double -> (Double, Double) -> (Double -> Double) -> Root Double instance Typeable Root instance Eq a => Eq (Root a) instance Read a => Read (Root a) instance Show a => Show (Root a) instance Data a => Data (Root a) instance Generic (Root a) instance Datatype D1Root instance Constructor C1_0Root instance Constructor C1_1Root instance Constructor C1_2Root instance Alternative Root instance Applicative Root instance MonadPlus Root instance Monad Root instance Functor Root instance Binary a => Binary (Root a) instance ToJSON a => ToJSON (Root a) instance FromJSON a => FromJSON (Root a) -- | Types for working with statistics. module Statistics.Types -- | An estimator of a property of a sample, such as its mean. -- -- The use of an algebraic data type here allows functions such as -- jackknife and bootstrapBCA to use more efficient -- algorithms when possible. data Estimator Mean :: Estimator Variance :: Estimator VarianceUnbiased :: Estimator StdDev :: Estimator Function :: (Sample -> Double) -> Estimator -- | Sample data. type Sample = Vector Double -- | Sample with weights. First element of sample is data, second is weight type WeightedSample = Vector (Double, Double) -- | Weights for affecting the importance of elements of a sample. type Weights = Vector Double -- | Fast O(NlogN) implementation of Kendall's tau. -- -- This module implementes Kendall's tau form b which allows ties in the -- data. This is the same formula used by other statistical packages, -- e.g., R, matlab. -- -- $$tau = frac{n_c - n_d}{sqrt{(n_0 - n_1)(n_0 - n_2)}}$$ -- -- where $n_0 = n(n-1)/2$, $n_1 = number of pairs tied for the first -- quantify$, $n_2 = number of pairs tied for the second quantify$, $n_c -- = number of concordant pairs$, $n_d = number of discordant pairs$. module Statistics.Correlation.Kendall -- | O(nlogn) Compute the Kendall's tau from a vector of paired -- data. Return NaN when number of pairs <= 1. kendall :: (Ord a, Ord b, Vector v (a, b)) => v (a, b) -> Double -- | Constant values common to much statistics code. -- -- DEPRECATED: use module Constants from math-functions. -- | Deprecated: use module Numeric.MathFunctions.Constants from -- math-functions module Statistics.Constants -- | Useful functions. module Statistics.Function -- | Compute the minimum and maximum of a vector in one pass. minMax :: Vector v Double => v Double -> (Double, Double) -- | Sort a vector. sort :: Vector Double -> Vector Double -- | Sort a vector. gsort :: (Ord e, Vector v e) => v e -> v e -- | Sort a vector using a custom ordering. sortBy :: Vector v e => Comparison e -> v e -> v e -- | Partially sort a vector, such that the least k elements will be -- at the front. partialSort :: (Vector v e, Ord e) => Int -> v e -> v e -- | Zip a vector with its indices. indexed :: (Vector v e, Vector v Int, Vector v (Int, e)) => v e -> v (Int, e) -- | Return the indices of a vector. indices :: (Vector v a, Vector v Int) => v a -> v Int -- | Efficiently compute the next highest power of two for a non-negative -- integer. If the given value is already a power of two, it is returned -- unchanged. If negative, zero is returned. nextHighestPowerOfTwo :: Int -> Int -- | Compare two Double values for approximate equality, using -- Dawson's method. -- -- The required accuracy is specified in ULPs (units of least precision). -- If the two numbers differ by the given number of ULPs or less, this -- function returns True. within :: Int -> Double -> Double -> Bool -- | Multiply a number by itself. square :: Double -> Double unsafeModify :: MVector s Double -> Int -> (Double -> Double) -> ST s () -- | Simple for loop. Counts from start to end-1. for :: Monad m => Int -> Int -> (Int -> m ()) -> m () -- | Simple reverse-for loop. Counts from start-1 to end -- (which must be less than start). rfor :: Monad m => Int -> Int -> (Int -> m ()) -> m () -- | Commonly used sample statistics, also known as descriptive statistics. module Statistics.Sample -- | Sample data. type Sample = Vector Double -- | Sample with weights. First element of sample is data, second is weight type WeightedSample = Vector (Double, Double) -- | O(n) Range. The difference between the largest and smallest -- elements of a sample. range :: Vector v Double => v Double -> Double -- | O(n) Arithmetic mean. This uses Kahan-Babuška-Neumaier -- summation, so is more accurate than welfordMean unless the -- input values are very large. mean :: Vector v Double => v Double -> Double -- | O(n) Arithmetic mean. This uses Welford's algorithm to provide -- numerical stability, using a single pass over the sample data. -- -- Compared to mean, this loses a surprising amount of precision -- unless the inputs are very large. welfordMean :: Vector v Double => v Double -> Double -- | O(n) Arithmetic mean for weighted sample. It uses a single-pass -- algorithm analogous to the one used by welfordMean. meanWeighted :: Vector v (Double, Double) => v (Double, Double) -> Double -- | O(n) Harmonic mean. This algorithm performs a single pass over -- the sample. harmonicMean :: Vector v Double => v Double -> Double -- | O(n) Geometric mean of a sample containing no negative values. geometricMean :: Vector v Double => v Double -> Double -- | Compute the kth central moment of a sample. The central moment -- is also known as the moment about the mean. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoment :: Vector v Double => Int -> v Double -> Double -- | Compute the kth and jth central moments of a sample. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoments :: Vector v Double => Int -> Int -> v Double -> (Double, Double) -- | Compute the skewness of a sample. This is a measure of the asymmetry -- of its distribution. -- -- A sample with negative skew is said to be left-skewed. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- --
-- skewness $ U.to [1,100,101,102,103] -- ==> -1.497681449918257 ---- -- A sample with positive skew is said to be right-skewed. -- --
-- skewness $ U.to [1,2,3,4,100] -- ==> 1.4975367033335198 ---- -- A sample's skewness is not defined if its variance is zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. skewness :: Vector v Double => v Double -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that more -- of the sample's variance is due to infrequent severe deviations, -- rather than more frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. kurtosis :: Vector v Double => v Double -> Double -- | Maximum likelihood estimate of a sample's variance. Also known as the -- population variance, where the denominator is n. variance :: Vector v Double => v Double -> Double -- | Unbiased estimate of a sample's variance. Also known as the sample -- variance, where the denominator is n-1. varianceUnbiased :: Vector v Double => v Double -> Double -- | Calculate mean and maximum likelihood estimate of variance. This -- function should be used if both mean and variance are required since -- it will calculate mean only once. meanVariance :: Vector v Double => v Double -> (Double, Double) -- | Calculate mean and unbiased estimate of variance. This function should -- be used if both mean and variance are required since it will calculate -- mean only once. meanVarianceUnb :: Vector v Double => v Double -> (Double, Double) -- | Standard deviation. This is simply the square root of the unbiased -- estimate of the variance. stdDev :: Vector v Double => v Double -> Double -- | Weighted variance. This is biased estimation. varianceWeighted :: Vector v (Double, Double) => v (Double, Double) -> Double -- | Maximum likelihood estimate of a sample's variance. fastVariance :: Vector v Double => v Double -> Double -- | Unbiased estimate of a sample's variance. fastVarianceUnbiased :: Vector v Double => v Double -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. fastStdDev :: Vector v Double => v Double -> Double -- | Types classes for probability distrubutions module Statistics.Distribution -- | Type class common to all distributions. Only c.d.f. could be defined -- for both discrete and continous distributions. class Distribution d where complCumulative d x = 1 - cumulative d x cumulative :: Distribution d => d -> Double -> Double complCumulative :: Distribution d => d -> Double -> Double -- | Discrete probability distribution. class Distribution d => DiscreteDistr d where probability d = exp . logProbability d logProbability d = log . probability d probability :: DiscreteDistr d => d -> Int -> Double logProbability :: DiscreteDistr d => d -> Int -> Double -- | Continuous probability distributuion. -- -- Minimal complete definition is quantile and either -- density or logDensity. class Distribution d => ContDistr d where density d = exp . logDensity d logDensity d = log . density d density :: ContDistr d => d -> Double -> Double quantile :: ContDistr d => d -> Double -> Double logDensity :: ContDistr d => d -> Double -> Double -- | Type class for distributions with mean. maybeMean should return -- Nothing if it's undefined for current value of data class Distribution d => MaybeMean d maybeMean :: MaybeMean d => d -> Maybe Double -- | Type class for distributions with mean. If distribution have finite -- mean for all valid values of parameters it should be instance of this -- type class. class MaybeMean d => Mean d mean :: Mean d => d -> Double -- | Type class for distributions with variance. If variance is undefined -- for some parameter values both maybeVariance and -- maybeStdDev should return Nothing. -- -- Minimal complete definition is maybeVariance or -- maybeStdDev class MaybeMean d => MaybeVariance d where maybeVariance d = (*) <$> x <*> x where x = maybeStdDev d maybeStdDev = fmap sqrt . maybeVariance maybeVariance :: MaybeVariance d => d -> Maybe Double maybeStdDev :: MaybeVariance d => d -> Maybe Double -- | Type class for distributions with variance. If distibution have finite -- variance for all valid parameter values it should be instance of this -- type class. -- -- Minimal complete definition is variance or stdDev class (Mean d, MaybeVariance d) => Variance d where variance d = square (stdDev d) stdDev = sqrt . variance variance :: Variance d => d -> Double stdDev :: Variance d => d -> Double -- | Type class for distributions with entropy, meaning Shannon entropy in -- the case of a discrete distribution, or differential entropy in the -- case of a continuous one. maybeEntropy should return -- Nothing if entropy is undefined for the chosen parameter -- values. class Distribution d => MaybeEntropy d maybeEntropy :: MaybeEntropy d => d -> Maybe Double -- | Type class for distributions with entropy, meaning Shannon entropy in -- the case of a discrete distribution, or differential entropy in the -- case of a continuous one. If the distribution has well-defined entropy -- for all valid parameter values then it should be an instance of this -- type class. class MaybeEntropy d => Entropy d entropy :: Entropy d => d -> Double -- | Generate discrete random variates which have given distribution. class Distribution d => ContGen d genContVar :: (ContGen d, PrimMonad m) => d -> Gen (PrimState m) -> m Double -- | Generate discrete random variates which have given distribution. -- ContGen is superclass because it's always possible to generate -- real-valued variates from integer values class (DiscreteDistr d, ContGen d) => DiscreteGen d genDiscreteVar :: (DiscreteGen d, PrimMonad m) => d -> Gen (PrimState m) -> m Int -- | Generate variates from continous distribution using inverse transform -- rule. genContinous :: (ContDistr d, PrimMonad m) => d -> Gen (PrimState m) -> m Double -- | Approximate the value of X for which -- P(x>X)=p. -- -- This method uses a combination of Newton-Raphson iteration and -- bisection with the given guess as a starting point. The upper and -- lower bounds specify the interval in which the probability -- distribution reaches the value p. findRoot :: ContDistr d => d -> Double -> Double -> Double -> Double -> Double -- | Sum probabilities in inclusive interval. sumProbabilities :: DiscreteDistr d => d -> Int -> Int -> Double module Statistics.Distribution.Beta -- | The beta distribution data BetaDistribution -- | Create beta distribution. Both shape parameters must be positive. betaDistr :: Double -> Double -> BetaDistribution -- | Create beta distribution. This construtor doesn't check parameters. improperBetaDistr :: Double -> Double -> BetaDistribution -- | Alpha shape parameter bdAlpha :: BetaDistribution -> Double -- | Beta shape parameter bdBeta :: BetaDistribution -> Double instance Typeable BetaDistribution instance Eq BetaDistribution instance Read BetaDistribution instance Show BetaDistribution instance Data BetaDistribution instance Generic BetaDistribution instance Datatype D1BetaDistribution instance Constructor C1_0BetaDistribution instance Selector S1_0_0BetaDistribution instance Selector S1_0_1BetaDistribution instance ContGen BetaDistribution instance ContDistr BetaDistribution instance MaybeEntropy BetaDistribution instance Entropy BetaDistribution instance MaybeVariance BetaDistribution instance Variance BetaDistribution instance MaybeMean BetaDistribution instance Mean BetaDistribution instance Distribution BetaDistribution instance Binary BetaDistribution instance ToJSON BetaDistribution instance FromJSON BetaDistribution -- | The binomial distribution. This is the discrete probability -- distribution of the number of successes in a sequence of n -- independent yes/no experiments, each of which yields success with -- probability p. module Statistics.Distribution.Binomial -- | The binomial distribution. data BinomialDistribution -- | Construct binomial distribution. Number of trials must be non-negative -- and probability must be in [0,1] range binomial :: Int -> Double -> BinomialDistribution -- | Number of trials. bdTrials :: BinomialDistribution -> Int -- | Probability. bdProbability :: BinomialDistribution -> Double instance Typeable BinomialDistribution instance Eq BinomialDistribution instance Read BinomialDistribution instance Show BinomialDistribution instance Data BinomialDistribution instance Generic BinomialDistribution instance Datatype D1BinomialDistribution instance Constructor C1_0BinomialDistribution instance Selector S1_0_0BinomialDistribution instance Selector S1_0_1BinomialDistribution instance MaybeEntropy BinomialDistribution instance Entropy BinomialDistribution instance MaybeVariance BinomialDistribution instance MaybeMean BinomialDistribution instance Variance BinomialDistribution instance Mean BinomialDistribution instance DiscreteDistr BinomialDistribution instance Distribution BinomialDistribution instance Binary BinomialDistribution instance ToJSON BinomialDistribution instance FromJSON BinomialDistribution -- | The Poisson distribution. This is the discrete probability -- distribution of a number of events occurring in a fixed interval if -- these events occur with a known average rate, and occur independently -- from each other within that interval. module Statistics.Distribution.Poisson data PoissonDistribution -- | Create Poisson distribution. poisson :: Double -> PoissonDistribution poissonLambda :: PoissonDistribution -> Double instance Typeable PoissonDistribution instance Eq PoissonDistribution instance Read PoissonDistribution instance Show PoissonDistribution instance Data PoissonDistribution instance Generic PoissonDistribution instance Datatype D1PoissonDistribution instance Constructor C1_0PoissonDistribution instance Selector S1_0_0PoissonDistribution instance MaybeEntropy PoissonDistribution instance Entropy PoissonDistribution instance MaybeVariance PoissonDistribution instance MaybeMean PoissonDistribution instance Mean PoissonDistribution instance Variance PoissonDistribution instance DiscreteDistr PoissonDistribution instance Distribution PoissonDistribution instance Binary PoissonDistribution instance ToJSON PoissonDistribution instance FromJSON PoissonDistribution -- | The Cauchy-Lorentz distribution. It's also known as Lorentz -- distribution or Breit–Wigner distribution. -- -- It doesn't have mean and variance. module Statistics.Distribution.CauchyLorentz -- | Cauchy-Lorentz distribution. data CauchyDistribution -- | Central value of Cauchy-Lorentz distribution which is its mode and -- median. Distribution doesn't have mean so function is named after -- median. cauchyDistribMedian :: CauchyDistribution -> Double -- | Scale parameter of Cauchy-Lorentz distribution. It's different from -- variance and specify half width at half maximum (HWHM). cauchyDistribScale :: CauchyDistribution -> Double -- | Cauchy distribution cauchyDistribution :: Double -> Double -> CauchyDistribution standardCauchy :: CauchyDistribution instance Typeable CauchyDistribution instance Eq CauchyDistribution instance Show CauchyDistribution instance Read CauchyDistribution instance Data CauchyDistribution instance Generic CauchyDistribution instance Datatype D1CauchyDistribution instance Constructor C1_0CauchyDistribution instance Selector S1_0_0CauchyDistribution instance Selector S1_0_1CauchyDistribution instance MaybeEntropy CauchyDistribution instance Entropy CauchyDistribution instance ContGen CauchyDistribution instance ContDistr CauchyDistribution instance Distribution CauchyDistribution instance Binary CauchyDistribution instance ToJSON CauchyDistribution instance FromJSON CauchyDistribution -- | The chi-squared distribution. This is a continuous probability -- distribution of sum of squares of k independent standard normal -- distributions. It's commonly used in statistical tests module Statistics.Distribution.ChiSquared -- | Chi-squared distribution data ChiSquared -- | Construct chi-squared distribution. Number of degrees of freedom must -- be positive. chiSquared :: Int -> ChiSquared -- | Get number of degrees of freedom chiSquaredNDF :: ChiSquared -> Int instance Typeable ChiSquared instance Eq ChiSquared instance Read ChiSquared instance Show ChiSquared instance Data ChiSquared instance Generic ChiSquared instance Datatype D1ChiSquared instance Constructor C1_0ChiSquared instance ContGen ChiSquared instance MaybeEntropy ChiSquared instance Entropy ChiSquared instance MaybeVariance ChiSquared instance MaybeMean ChiSquared instance Variance ChiSquared instance Mean ChiSquared instance ContDistr ChiSquared instance Distribution ChiSquared instance Binary ChiSquared instance ToJSON ChiSquared instance FromJSON ChiSquared -- | The exponential distribution. This is the continunous probability -- distribution of the times between events in a poisson process, in -- which events occur continuously and independently at a constant -- average rate. module Statistics.Distribution.Exponential data ExponentialDistribution -- | Create an exponential distribution. exponential :: Double -> ExponentialDistribution -- | Create exponential distribution from sample. No tests are made to -- check whether it truly is exponential. exponentialFromSample :: Sample -> ExponentialDistribution edLambda :: ExponentialDistribution -> Double instance Typeable ExponentialDistribution instance Eq ExponentialDistribution instance Read ExponentialDistribution instance Show ExponentialDistribution instance Data ExponentialDistribution instance Generic ExponentialDistribution instance Datatype D1ExponentialDistribution instance Constructor C1_0ExponentialDistribution instance Selector S1_0_0ExponentialDistribution instance ContGen ExponentialDistribution instance MaybeEntropy ExponentialDistribution instance Entropy ExponentialDistribution instance MaybeVariance ExponentialDistribution instance MaybeMean ExponentialDistribution instance Variance ExponentialDistribution instance Mean ExponentialDistribution instance ContDistr ExponentialDistribution instance Distribution ExponentialDistribution instance Binary ExponentialDistribution instance ToJSON ExponentialDistribution instance FromJSON ExponentialDistribution -- | The gamma distribution. This is a continuous probability distribution -- with two parameters, k and ϑ. If k is integral, the -- distribution represents the sum of k independent exponentially -- distributed random variables, each of which has a mean of ϑ. module Statistics.Distribution.Gamma -- | The gamma distribution. data GammaDistribution -- | Create gamma distribution. Both shape and scale parameters must be -- positive. gammaDistr :: Double -> Double -> GammaDistribution -- | Create gamma distribution. This constructor do not check whether -- parameters are valid improperGammaDistr :: Double -> Double -> GammaDistribution -- | Shape parameter, k. gdShape :: GammaDistribution -> Double -- | Scale parameter, ϑ. gdScale :: GammaDistribution -> Double instance Typeable GammaDistribution instance Eq GammaDistribution instance Read GammaDistribution instance Show GammaDistribution instance Data GammaDistribution instance Generic GammaDistribution instance Datatype D1GammaDistribution instance Constructor C1_0GammaDistribution instance Selector S1_0_0GammaDistribution instance Selector S1_0_1GammaDistribution instance ContGen GammaDistribution instance MaybeEntropy GammaDistribution instance MaybeVariance GammaDistribution instance MaybeMean GammaDistribution instance Mean GammaDistribution instance Variance GammaDistribution instance ContDistr GammaDistribution instance Distribution GammaDistribution instance Binary GammaDistribution instance ToJSON GammaDistribution instance FromJSON GammaDistribution -- | The Geometric distribution. There are two variants of distribution. -- First is the probability distribution of the number of Bernoulli -- trials needed to get one success, supported on the set [1,2..] -- (GeometricDistribution). Sometimes it's referred to as the -- shifted geometric distribution to distinguish from another one. -- -- Second variant is probability distribution of the number of failures -- before first success, defined over the set [0,1..] -- (GeometricDistribution0). module Statistics.Distribution.Geometric data GeometricDistribution data GeometricDistribution0 -- | Create geometric distribution. geometric :: Double -> GeometricDistribution -- | Create geometric distribution. geometric0 :: Double -> GeometricDistribution0 gdSuccess :: GeometricDistribution -> Double gdSuccess0 :: GeometricDistribution0 -> Double instance Typeable GeometricDistribution instance Typeable GeometricDistribution0 instance Eq GeometricDistribution instance Read GeometricDistribution instance Show GeometricDistribution instance Data GeometricDistribution instance Generic GeometricDistribution instance Eq GeometricDistribution0 instance Read GeometricDistribution0 instance Show GeometricDistribution0 instance Data GeometricDistribution0 instance Generic GeometricDistribution0 instance Datatype D1GeometricDistribution instance Constructor C1_0GeometricDistribution instance Selector S1_0_0GeometricDistribution instance Datatype D1GeometricDistribution0 instance Constructor C1_0GeometricDistribution0 instance Selector S1_0_0GeometricDistribution0 instance ContGen GeometricDistribution0 instance DiscreteGen GeometricDistribution0 instance MaybeEntropy GeometricDistribution0 instance Entropy GeometricDistribution0 instance MaybeVariance GeometricDistribution0 instance MaybeMean GeometricDistribution0 instance Variance GeometricDistribution0 instance Mean GeometricDistribution0 instance DiscreteDistr GeometricDistribution0 instance Distribution GeometricDistribution0 instance Binary GeometricDistribution0 instance ToJSON GeometricDistribution0 instance FromJSON GeometricDistribution0 instance ContGen GeometricDistribution instance DiscreteGen GeometricDistribution instance MaybeEntropy GeometricDistribution instance Entropy GeometricDistribution instance MaybeVariance GeometricDistribution instance MaybeMean GeometricDistribution instance Variance GeometricDistribution instance Mean GeometricDistribution instance DiscreteDistr GeometricDistribution instance Distribution GeometricDistribution instance Binary GeometricDistribution instance ToJSON GeometricDistribution instance FromJSON GeometricDistribution -- | The Hypergeometric distribution. This is the discrete probability -- distribution that measures the probability of k successes in -- l trials, without replacement, from a finite population. -- -- The parameters of the distribution describe k elements chosen -- from a population of l, with m elements of one type, and -- l-m of the other (all are positive integers). module Statistics.Distribution.Hypergeometric data HypergeometricDistribution hypergeometric :: Int -> Int -> Int -> HypergeometricDistribution hdM :: HypergeometricDistribution -> Int hdL :: HypergeometricDistribution -> Int hdK :: HypergeometricDistribution -> Int instance Typeable HypergeometricDistribution instance Eq HypergeometricDistribution instance Read HypergeometricDistribution instance Show HypergeometricDistribution instance Data HypergeometricDistribution instance Generic HypergeometricDistribution instance Datatype D1HypergeometricDistribution instance Constructor C1_0HypergeometricDistribution instance Selector S1_0_0HypergeometricDistribution instance Selector S1_0_1HypergeometricDistribution instance Selector S1_0_2HypergeometricDistribution instance MaybeEntropy HypergeometricDistribution instance Entropy HypergeometricDistribution instance MaybeVariance HypergeometricDistribution instance MaybeMean HypergeometricDistribution instance Variance HypergeometricDistribution instance Mean HypergeometricDistribution instance DiscreteDistr HypergeometricDistribution instance Distribution HypergeometricDistribution instance Binary HypergeometricDistribution instance ToJSON HypergeometricDistribution instance FromJSON HypergeometricDistribution -- | The normal distribution. This is a continuous probability distribution -- that describes data that cluster around a mean. module Statistics.Distribution.Normal -- | The normal distribution. data NormalDistribution -- | Create normal distribution from parameters. -- -- IMPORTANT: prior to 0.10 release second parameter was variance not -- standard deviation. normalDistr :: Double -> Double -> NormalDistribution -- | Create distribution using parameters estimated from sample. Variance -- is estimated using maximum likelihood method (biased estimation). normalFromSample :: Sample -> NormalDistribution -- | Standard normal distribution with mean equal to 0 and variance equal -- to 1 standard :: NormalDistribution instance Typeable NormalDistribution instance Eq NormalDistribution instance Read NormalDistribution instance Show NormalDistribution instance Data NormalDistribution instance Generic NormalDistribution instance Datatype D1NormalDistribution instance Constructor C1_0NormalDistribution instance Selector S1_0_0NormalDistribution instance Selector S1_0_1NormalDistribution instance Selector S1_0_2NormalDistribution instance Selector S1_0_3NormalDistribution instance ContGen NormalDistribution instance MaybeEntropy NormalDistribution instance Entropy NormalDistribution instance Variance NormalDistribution instance MaybeVariance NormalDistribution instance Mean NormalDistribution instance MaybeMean NormalDistribution instance ContDistr NormalDistribution instance Distribution NormalDistribution instance Binary NormalDistribution instance ToJSON NormalDistribution instance FromJSON NormalDistribution -- | Transformations over distributions module Statistics.Distribution.Transform -- | Linear transformation applied to distribution. -- --
-- LinearTransform μ σ _ -- x' = μ + σ·x --data LinearTransform d LinearTransform :: {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> d -> LinearTransform d -- | Location parameter. linTransLocation :: LinearTransform d -> {-# UNPACK #-} !Double -- | Scale parameter. linTransScale :: LinearTransform d -> {-# UNPACK #-} !Double -- | Distribution being transformed. linTransDistr :: LinearTransform d -> d -- | Get fixed point of linear transformation linTransFixedPoint :: LinearTransform d -> Double -- | Apply linear transformation to distribution. scaleAround :: Double -> Double -> d -> LinearTransform d instance Typeable LinearTransform instance Eq d => Eq (LinearTransform d) instance Show d => Show (LinearTransform d) instance Read d => Read (LinearTransform d) instance Data d => Data (LinearTransform d) instance Generic (LinearTransform d) instance Datatype D1LinearTransform instance Constructor C1_0LinearTransform instance Selector S1_0_0LinearTransform instance Selector S1_0_1LinearTransform instance Selector S1_0_2LinearTransform instance ContGen d => ContGen (LinearTransform d) instance (Entropy d, DiscreteDistr d) => Entropy (LinearTransform d) instance (MaybeEntropy d, DiscreteDistr d) => MaybeEntropy (LinearTransform d) instance Variance d => Variance (LinearTransform d) instance MaybeVariance d => MaybeVariance (LinearTransform d) instance Mean d => Mean (LinearTransform d) instance MaybeMean d => MaybeMean (LinearTransform d) instance ContDistr d => ContDistr (LinearTransform d) instance Distribution d => Distribution (LinearTransform d) instance Functor LinearTransform instance Binary d => Binary (LinearTransform d) instance ToJSON d => ToJSON (LinearTransform d) instance FromJSON d => FromJSON (LinearTransform d) -- | Student-T distribution module Statistics.Distribution.StudentT -- | Student-T distribution data StudentT -- | Create Student-T distribution. Number of parameters must be positive. studentT :: Double -> StudentT studentTndf :: StudentT -> Double -- | Create an unstandardized Student-t distribution. studentTUnstandardized :: Double -> Double -> Double -> LinearTransform StudentT instance Typeable StudentT instance Eq StudentT instance Show StudentT instance Read StudentT instance Data StudentT instance Generic StudentT instance Datatype D1StudentT instance Constructor C1_0StudentT instance Selector S1_0_0StudentT instance ContGen StudentT instance MaybeEntropy StudentT instance Entropy StudentT instance MaybeVariance StudentT instance MaybeMean StudentT instance ContDistr StudentT instance Distribution StudentT instance Binary StudentT instance ToJSON StudentT instance FromJSON StudentT -- | Variate distributed uniformly in the interval. module Statistics.Distribution.Uniform -- | Uniform distribution from A to B data UniformDistribution -- | Create uniform distribution. uniformDistr :: Double -> Double -> UniformDistribution -- | Low boundary of distribution uniformA :: UniformDistribution -> Double -- | Upper boundary of distribution uniformB :: UniformDistribution -> Double instance Typeable UniformDistribution instance Eq UniformDistribution instance Read UniformDistribution instance Show UniformDistribution instance Data UniformDistribution instance Generic UniformDistribution instance Datatype D1UniformDistribution instance Constructor C1_0UniformDistribution instance Selector S1_0_0UniformDistribution instance Selector S1_0_1UniformDistribution instance ContGen UniformDistribution instance MaybeEntropy UniformDistribution instance Entropy UniformDistribution instance MaybeVariance UniformDistribution instance MaybeMean UniformDistribution instance Variance UniformDistribution instance Mean UniformDistribution instance ContDistr UniformDistribution instance Distribution UniformDistribution instance Binary UniformDistribution instance ToJSON UniformDistribution instance FromJSON UniformDistribution -- | Fisher F distribution module Statistics.Distribution.FDistribution -- | F distribution data FDistribution fDistribution :: Int -> Int -> FDistribution fDistributionNDF1 :: FDistribution -> Double fDistributionNDF2 :: FDistribution -> Double instance Typeable FDistribution instance Eq FDistribution instance Show FDistribution instance Read FDistribution instance Data FDistribution instance Generic FDistribution instance Datatype D1FDistribution instance Constructor C1_0FDistribution instance Selector S1_0_0FDistribution instance Selector S1_0_1FDistribution instance Selector S1_0_2FDistribution instance ContGen FDistribution instance MaybeEntropy FDistribution instance Entropy FDistribution instance MaybeVariance FDistribution instance MaybeMean FDistribution instance ContDistr FDistribution instance Distribution FDistribution instance Binary FDistribution instance ToJSON FDistribution instance FromJSON FDistribution -- | Basic matrix operations. -- -- There isn't a widely used matrix package for Haskell yet, so we -- implement the necessary minimum here. module Statistics.Matrix -- | Two-dimensional matrix, stored in row-major order. data Matrix Matrix :: {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> !Vector -> Matrix -- | Rows of matrix. rows :: Matrix -> {-# UNPACK #-} !Int -- | Columns of matrix. cols :: Matrix -> {-# UNPACK #-} !Int -- | In order to avoid overflows during matrix multiplication, a large -- exponent is stored separately. exponent :: Matrix -> {-# UNPACK #-} !Int -- | Matrix data. _vector :: Matrix -> !Vector type Vector = Vector Double -- | Convert from a row-major list. fromList :: Int -> Int -> [Double] -> Matrix -- | Convert from a row-major vector. fromVector :: Int -> Int -> Vector Double -> Matrix -- | Convert to a row-major flat vector. toVector :: Matrix -> Vector Double -- | Convert to a row-major flat list. toList :: Matrix -> [Double] -- | Return the dimensions of this matrix, as a (row,column) pair. dimension :: Matrix -> (Int, Int) -- | Element in the center of matrix (not corrected for exponent). center :: Matrix -> Double -- | Matrix-matrix multiplication. Matrices must be of compatible sizes -- (note: not checked). multiply :: Matrix -> Matrix -> Matrix -- | Matrix-vector multiplication. multiplyV :: Matrix -> Vector -> Vector transpose :: Matrix -> Matrix -- | Raise matrix to nth power. Power must be positive (/note: not -- checked). power :: Matrix -> Int -> Matrix -- | Calculate the Euclidean norm of a vector. norm :: Vector -> Double -- | Return the given column. column :: Matrix -> Int -> Vector -- | Return the given row. row :: Matrix -> Int -> Vector map :: (Double -> Double) -> Matrix -> Matrix -- | Simple for loop. Counts from start to end-1. for :: Monad m => Int -> Int -> (Int -> m ()) -> m () unsafeIndex :: Matrix -> Int -> Int -> Double -- | Indicate whether any element of the matrix is NaN. hasNaN :: Matrix -> Bool -- | Given row and column numbers, calculate the offset into the flat -- row-major vector. bounds :: (Vector -> Int -> r) -> Matrix -> Int -> Int -> r -- | Given row and column numbers, calculate the offset into the flat -- row-major vector, without checking. unsafeBounds :: (Vector -> Int -> r) -> Matrix -> Int -> Int -> r -- | Useful matrix functions. module Statistics.Matrix.Algorithms -- | O(r*c) Compute the QR decomposition of a matrix. The result -- returned is the matrices (q,r). qr :: Matrix -> (Matrix, Matrix) -- | Functions for approximating quantiles, i.e. points taken at regular -- intervals from the cumulative distribution function of a random -- variable. -- -- The number of quantiles is described below by the variable q, -- so with q=4, a 4-quantile (also known as a quartile) has -- 4 intervals, and contains 5 points. The parameter k describes -- the desired point, where 0 ≤ k ≤ q. module Statistics.Quantile -- | O(n log n). Estimate the kth q-quantile of -- a sample, using the weighted average method. weightedAvg :: Vector v Double => Int -> Int -> v Double -> Double -- | Parameters a and b to the continuousBy function. data ContParam ContParam :: {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> ContParam -- | O(n log n). Estimate the kth q-quantile of -- a sample x, using the continuous sample method with the given -- parameters. This is the method used by most statistical software, such -- as R, Mathematica, SPSS, and S. continuousBy :: Vector v Double => ContParam -> Int -> Int -> v Double -> Double -- | O(n log n). Estimate the range between -- q-quantiles 1 and q-1 of a sample x, using the -- continuous sample method with the given parameters. -- -- For instance, the interquartile range (IQR) can be estimated as -- follows: -- --
-- midspread medianUnbiased 4 (U.fromList [1,1,2,2,3]) -- ==> 1.333333 --midspread :: Vector v Double => ContParam -> Int -> v Double -> Double -- | California Department of Public Works definition, a=0, -- b=1. Gives a linear interpolation of the empirical CDF. This -- corresponds to method 4 in R and Mathematica. cadpw :: ContParam -- | Hazen's definition, a=0.5, b=0.5. This is claimed to be -- popular among hydrologists. This corresponds to method 5 in R and -- Mathematica. hazen :: ContParam -- | Definition used by the S statistics application, with a=1, -- b=1. The interpolation points divide the sample range into -- n-1 intervals. This corresponds to method 7 in R and -- Mathematica. s :: ContParam -- | Definition used by the SPSS statistics application, with a=0, -- b=0 (also known as Weibull's definition). This corresponds to -- method 6 in R and Mathematica. spss :: ContParam -- | Median unbiased definition, a=1/3, b=1/3. The resulting -- quantile estimates are approximately median unbiased regardless of the -- distribution of x. This corresponds to method 8 in R and -- Mathematica. medianUnbiased :: ContParam -- | Normal unbiased definition, a=3/8, b=3/8. An -- approximately unbiased estimate if the empirical distribution -- approximates the normal distribution. This corresponds to method 9 in -- R and Mathematica. normalUnbiased :: ContParam -- | Resampling statistics. module Statistics.Resampling -- | A resample drawn randomly, with replacement, from a set of data -- points. Distinct from a normal array to make it harder for your humble -- author's brain to go wrong. newtype Resample Resample :: Vector Double -> Resample fromResample :: Resample -> Vector Double -- | O(n) or O(n^2) Compute a statistical estimate repeatedly over a -- sample, each time omitting a successive element. jackknife :: Estimator -> Sample -> Vector Double -- | O(n) Compute the jackknife mean of a sample. jackknifeMean :: Sample -> Vector Double -- | O(n) Compute the jackknife variance of a sample. jackknifeVariance :: Sample -> Vector Double -- | O(n) Compute the unbiased jackknife variance of a sample. jackknifeVarianceUnb :: Sample -> Vector Double -- | O(n) Compute the jackknife standard deviation of a sample. jackknifeStdDev :: Sample -> Vector Double -- | O(e*r*s) Resample a data set repeatedly, with replacement, -- computing each estimate over the resampled data. -- -- This function is expensive; it has to do work proportional to -- e*r*s, where e is the number of estimation functions, -- r is the number of resamples to compute, and s is the -- number of original samples. -- -- To improve performance, this function will make use of all available -- CPUs. At least with GHC 7.0, parallel performance seems best if the -- parallel garbage collector is disabled (RTS option -qg). resample :: GenIO -> [Estimator] -> Int -> Sample -> IO [Resample] -- | Run an Estimator over a sample. estimate :: Estimator -> Sample -> Double -- | Split a generator into several that can run independently. splitGen :: Int -> GenIO -> IO [GenIO] instance Typeable Resample instance Eq Resample instance Read Resample instance Show Resample instance Data Resample instance Generic Resample instance Datatype D1Resample instance Constructor C1_0Resample instance Selector S1_0_0Resample instance Binary Resample instance ToJSON Resample instance FromJSON Resample -- | The bootstrap method for statistical inference. module Statistics.Resampling.Bootstrap -- | A point and interval estimate computed via an Estimator. data Estimate Estimate :: {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> Estimate -- | Point estimate. estPoint :: Estimate -> {-# UNPACK #-} !Double -- | Lower bound of the estimate interval (i.e. the lower bound of the -- confidence interval). estLowerBound :: Estimate -> {-# UNPACK #-} !Double -- | Upper bound of the estimate interval (i.e. the upper bound of the -- confidence interval). estUpperBound :: Estimate -> {-# UNPACK #-} !Double -- | Confidence level of the confidence intervals. estConfidenceLevel :: Estimate -> {-# UNPACK #-} !Double -- | Bias-corrected accelerated (BCA) bootstrap. This adjusts for both bias -- and skewness in the resampled distribution. bootstrapBCA :: Double -> Sample -> [Estimator] -> [Resample] -> [Estimate] -- | Multiply the point, lower bound, and upper bound in an Estimate -- by the given value. scale :: Double -> Estimate -> Estimate instance Typeable Estimate instance Eq Estimate instance Read Estimate instance Show Estimate instance Data Estimate instance Generic Estimate instance Datatype D1Estimate instance Constructor C1_0Estimate instance Selector S1_0_0Estimate instance Selector S1_0_1Estimate instance Selector S1_0_2Estimate instance Selector S1_0_3Estimate instance NFData Estimate instance Binary Estimate instance ToJSON Estimate instance FromJSON Estimate -- | Functions for regression analysis. module Statistics.Regression -- | Perform an ordinary least-squares regression on a set of predictors, -- and calculate the goodness-of-fit of the regression. -- -- The returned pair consists of: -- --
-- d = (maximum sample - minimum sample) / ((bins - 1) * 2) ---- -- If all elements in the sample are the same and equal to x -- range is set to (x - |x|10, x + |x|10). And if -- x is equal to 0 range is set to (-1,1). This is -- needed to avoid creating histogram with zero bin size. range :: Vector v Double => Int -> v Double -> (Double, Double) -- | Kernel density estimation. This module provides a fast, robust, -- non-parametric way to estimate the probability density function of a -- sample. -- -- This estimator does not use the commonly employed "Gaussian rule of -- thumb". As a result, it outperforms many plug-in methods on multimodal -- samples with widely separated modes. module Statistics.Sample.KernelDensity -- | Gaussian kernel density estimator for one-dimensional data, using the -- method of Botev et al. -- -- The result is a pair of vectors, containing: -- --
-- skewness . powers 3 $ U.to [1,100,101,102,103] -- ==> -1.497681449918257 ---- -- A sample with positive skew is said to be right-skewed. -- --
-- skewness . powers 3 $ U.to [1,2,3,4,100] -- ==> 1.4975367033335198 ---- -- A sample's skewness is not defined if its variance is zero. -- -- Requires Powers with order at least 3. skewness :: Powers -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that the -- sample's variance is due more to infrequent severe deviations than to -- frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- Requires Powers with order at least 4. kurtosis :: Powers -> Double instance Typeable Powers instance Eq Powers instance Read Powers instance Show Powers instance Data Powers instance Generic Powers instance Datatype D1Powers instance Constructor C1_0Powers instance Binary Powers instance ToJSON Powers instance FromJSON Powers -- | Pearson's chi squared test. module Statistics.Test.ChiSquared -- | Generic form of Pearson chi squared tests for binned data. Data sample -- is supplied in form of tuples (observed quantity, expected number of -- events). Both must be positive. chi2test :: (Vector v (Int, Double), Vector v Double) => Double -> Int -> v (Int, Double) -> TestResult -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Kolmogov-Smirnov tests are non-parametric tests for assesing whether -- given sample could be described by distribution or whether two samples -- have the same distribution. It's only applicable to continous -- distributions. module Statistics.Test.KolmogorovSmirnov -- | Check that sample could be described by distribution. -- Significant means distribution is not compatible with data for -- given p-value. -- -- This test uses Marsaglia-Tsang-Wang exact alogorithm for calculation -- of p-value. kolmogorovSmirnovTest :: Distribution d => d -> Double -> Sample -> TestResult -- | Variant of kolmogorovSmirnovTest which uses CFD in form of -- function. kolmogorovSmirnovTestCdf :: (Double -> Double) -> Double -> Sample -> TestResult -- | Two sample Kolmogorov-Smirnov test. It tests whether two data samples -- could be described by the same distribution without making any -- assumptions about it. -- -- This test uses approxmate formula for computing p-value. kolmogorovSmirnovTest2 :: Double -> Sample -> Sample -> TestResult -- | Calculate Kolmogorov's statistic D for given cumulative -- distribution function (CDF) and data sample. If sample is empty -- returns 0. kolmogorovSmirnovCdfD :: (Double -> Double) -> Sample -> Double -- | Calculate Kolmogorov's statistic D for given cumulative -- distribution function (CDF) and data sample. If sample is empty -- returns 0. kolmogorovSmirnovD :: Distribution d => d -> Sample -> Double -- | Calculate Kolmogorov's statistic D for two data samples. If -- either of samples is empty returns 0. kolmogorovSmirnov2D :: Sample -> Sample -> Double -- | Calculate cumulative probability function for Kolmogorov's -- distribution with n parameters or probability of getting value -- smaller than d with n-elements sample. -- -- It uses algorithm by Marsgalia et. al. and provide at least 7-digit -- accuracy. kolmogorovSmirnovProbability :: Int -> Double -> Double -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult module Statistics.Test.KruskalWallis -- | Kruskal-Wallis ranking. -- -- All values are replaced by the absolute rank in the combined samples. -- -- The samples and values need not to be ordered but the values in the -- result are ordered. Assigned ranks (ties are given their average -- rank). kruskalWallisRank :: [Sample] -> [Sample] -- | The Kruskal-Wallis Test. -- -- In textbooks the output value is usually represented by K or -- H. This function already does the ranking. kruskalWallis :: [Sample] -> Double -- | Calculates whether the Kruskal-Wallis test is significant. -- -- It uses Chi-Squared distribution for aproximation as long as -- the sizes are larger than 5. Otherwise the test returns -- Nothing. kruskalWallisSignificant :: [Int] -> Double -> Double -> Maybe TestResult -- | Perform Kruskal-Wallis Test for the given samples and required -- significance. For additional information check kruskalWallis. -- This is just a helper function. kruskalWallisTest :: Double -> [Sample] -> Maybe TestResult -- | Mann-Whitney U test (also know as Mann-Whitney-Wilcoxon and Wilcoxon -- rank sum test) is a non-parametric test for assesing whether two -- samples of independent observations have different mean. module Statistics.Test.MannWhitneyU -- | Perform Mann-Whitney U Test for two samples and required significance. -- For additional information check documentation of mannWhitneyU -- and mannWhitneyUSignificant. This is just a helper function. -- -- One-tailed test checks whether first sample is significantly larger -- than second. Two-tailed whether they are significantly different. mannWhitneyUtest :: TestType -> Double -> Sample -> Sample -> Maybe TestResult -- | The Mann-Whitney U Test. -- -- This is sometimes known as the Mann-Whitney-Wilcoxon U test, and -- confusingly many sources state that the Mann-Whitney U test is the -- same as the Wilcoxon's rank sum test (which is provided as -- wilcoxonRankSums). The Mann-Whitney U is a simple transform of -- Wilcoxon's rank sum test. -- -- Again confusingly, different sources state reversed definitions for U₁ -- and U₂, so it is worth being explicit about what this function -- returns. Given two samples, the first, xs₁, of size n₁ and the second, -- xs₂, of size n₂, this function returns (U₁, U₂) where U₁ = W₁ - -- (n₁(n₁+1))/2 and U₂ = W₂ - (n₂(n₂+1))/2, where (W₁, W₂) is the return -- value of wilcoxonRankSums xs1 xs2. -- -- Some sources instead state that U₁ and U₂ should be the other way -- round, often expressing this using U₁' = n₁n₂ - U₁ (since U₁ + U₂ = -- n₁n₂). -- -- All of which you probably don't care about if you just feed this into -- mannWhitneyUSignificant. mannWhitneyU :: Sample -> Sample -> (Double, Double) -- | Calculates the critical value of Mann-Whitney U for the given sample -- sizes and significance level. -- -- This function returns the exact calculated value of U for all sample -- sizes; it does not use the normal approximation at all. Above sample -- size 20 it is generally recommended to use the normal approximation -- instead, but this function will calculate the higher critical values -- if you need them. -- -- The algorithm to generate these values is a faster, memoised version -- of the simple unoptimised generating function given in section 2 of -- "The Mann Whitney Wilcoxon Distribution Using Linked Lists" mannWhitneyUCriticalValue :: (Int, Int) -> Double -> Maybe Int -- | Calculates whether the Mann Whitney U test is significant. -- -- If both sample sizes are less than or equal to 20, the exact U -- critical value (as calculated by mannWhitneyUCriticalValue) is -- used. If either sample is larger than 20, the normal approximation is -- used instead. -- -- If you use a one-tailed test, the test indicates whether the first -- sample is significantly larger than the second. If you want the -- opposite, simply reverse the order in both the sample size and the -- (U₁, U₂) pairs. mannWhitneyUSignificant :: TestType -> (Int, Int) -> Double -> (Double, Double) -> Maybe TestResult -- | The Wilcoxon Rank Sums Test. -- -- This test calculates the sum of ranks for the given two samples. The -- samples are ordered, and assigned ranks (ties are given their average -- rank), then these ranks are summed for each sample. -- -- The return value is (W₁, W₂) where W₁ is the sum of ranks of the first -- sample and W₂ is the sum of ranks of the second sample. This test is -- trivially transformed into the Mann-Whitney U test. You will probably -- want to use mannWhitneyU and the related functions for testing -- significance, but this function is exposed for completeness. wilcoxonRankSums :: Sample -> Sample -> (Double, Double) -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | The Wilcoxon matched-pairs signed-rank test is non-parametric test -- which could be used to whether two related samples have different -- means. -- -- WARNING: current implementation contain serious bug and couldn't be -- used with samples larger than 1023. -- https://github.com/bos/statistics/issues/18 module Statistics.Test.WilcoxonT -- | The Wilcoxon matched-pairs signed-rank test. The samples are zipped -- together: if one is longer than the other, both are truncated to the -- the length of the shorter sample. -- -- For one-tailed test it tests whether first sample is significantly -- greater than the second. For two-tailed it checks whether they -- significantly differ -- -- Check wilcoxonMatchedPairSignedRank and -- wilcoxonMatchedPairSignificant for additional information. wilcoxonMatchedPairTest :: TestType -> Double -> Sample -> Sample -> Maybe TestResult wilcoxonMatchedPairSignedRank :: Sample -> Sample -> (Double, Double) -- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs -- test is significant at the given level. -- -- This function can perform a one-tailed or two-tailed test. If the -- first parameter to this function is TwoTailed, the test is -- performed two-tailed to check if the two samples differ significantly. -- If the first parameter is OneTailed, the check is performed -- one-tailed to decide whether the first sample (i.e. the first sample -- you passed to wilcoxonMatchedPairSignedRank) is greater than -- the second sample (i.e. the second sample you passed to -- wilcoxonMatchedPairSignedRank). If you wish to perform a -- one-tailed test in the opposite direction, you can either pass the -- parameters in a different order to -- wilcoxonMatchedPairSignedRank, or simply swap the values in the -- resulting pair before passing them to this function. wilcoxonMatchedPairSignificant :: TestType -> Int -> Double -> (Double, Double) -> Maybe TestResult -- | Works out the significance level (p-value) of a T value, given a -- sample size and a T value from the Wilcoxon signed-rank matched-pairs -- test. -- -- See the notes on wilcoxonCriticalValue for how this is -- calculated. wilcoxonMatchedPairSignificance :: Int -> Double -> Double -- | Obtains the critical value of T to compare against, given a sample -- size and a p-value (significance level). Your T value must be less -- than or equal to the return of this function in order for the test to -- work out significant. If there is a Nothing return, the sample size is -- too small to make a decision. -- -- wilcoxonSignificant tests the return value of -- wilcoxonMatchedPairSignedRank for you, so you should use -- wilcoxonSignificant for determining test results. However, -- this function is useful, for example, for generating lookup tables for -- Wilcoxon signed rank critical values. -- -- The return values of this function are generated using the method -- detailed in the paper "Critical Values for the Wilcoxon Signed Rank -- Statistic", Peter Mitic, The Mathematica Journal, volume 6, issue 3, -- 1996, which can be found here: -- http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf. -- According to that paper, the results may differ from other published -- lookup tables, but (Mitic claims) the values obtained by this function -- will be the correct ones. wilcoxonMatchedPairCriticalValue :: Int -> Double -> Maybe Int -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Functions for computing autocovariance and autocorrelation of a -- sample. module Statistics.Autocorrelation -- | Compute the autocovariance of a sample, i.e. the covariance of the -- sample against a shifted version of itself. autocovariance :: (Vector v Double, Vector v Int) => v Double -> v Double -- | Compute the autocorrelation function of a sample, and the upper and -- lower bounds of confidence intervals for each element. -- -- Note: The calculation of the 95% confidence interval assumes a -- stationary Gaussian process. autocorrelation :: (Vector v Double, Vector v Int) => v Double -> (v Double, v Double, v Double)