-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A library of statistical types, data, and functions -- -- This library provides a number of common functions and types useful in -- statistics. We focus on high performance, numerical robustness, and -- use of good algorithms. Where possible, we provide references to the -- statistical literature. -- -- The library's facilities can be divided into four broad categories: -- -- -- -- Changes in 0.10.4.0 -- -- -- -- Changes in 0.10.3.0 -- -- -- -- Changes in 0.10.2.0 -- -- -- -- Changes in 0.10.1.0 -- -- @package statistics @version 0.10.4.1 module Statistics.Test.Types -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Significant if parameter is True, not significant otherwiser significant :: Bool -> TestResult instance Typeable TestType instance Typeable TestResult instance Eq TestType instance Ord TestType instance Show TestType instance Eq TestResult instance Ord TestResult instance Show TestResult -- | Fourier-related transformations of mathematical functions. -- -- These functions are written for simplicity and correctness, not speed. -- If you need a fast FFT implementation for your application, you should -- strongly consider using a library of FFTW bindings instead. module Statistics.Transform type CD = Complex Double -- | Discrete cosine transform (DCT-II). dct :: Vector Double -> Vector Double -- | Discrete cosine transform (DCT-II). Only real part of vector is -- transformed, imaginary part is ignored. dct_ :: Vector CD -> Vector Double -- | Inverse discrete cosine transform (DCT-III). It's inverse of -- dct only up to scale parameter: -- --
--   (idct . dct) x = (* length x)
--   
idct :: Vector Double -> Vector Double -- | Inverse discrete cosine transform (DCT-III). Only real part of vector -- is transformed, imaginary part is ignored. idct_ :: Vector CD -> Vector Double -- | Radix-2 decimation-in-time fast Fourier transform. fft :: Vector CD -> Vector CD -- | Inverse fast Fourier transform. ifft :: Vector CD -> Vector CD -- | Mathematical functions for statistics. -- -- DEPRECATED. Use package math-functions instead. This module is just -- reexports functions from SpecFunctions, Extra and -- Chebyshev. -- | Deprecated: Use package math-function module Statistics.Math -- | Haskell functions for finding the roots of mathematical functions. module Statistics.Math.RootFinding -- | The result of searching for a root of a mathematical function. data Root a -- | The function does not have opposite signs when evaluated at the lower -- and upper bounds of the search. NotBracketed :: Root a -- | The search failed to converge to within the given error tolerance -- after the given number of iterations. SearchFailed :: Root a -- | A root was successfully found. Root :: a -> Root a -- | Returns either the result of a search for a root, or the default value -- if the search failed. fromRoot :: a -> Root a -> a -- | Use the method of Ridders to compute a root of a function. -- -- The function must have opposite signs when evaluated at the lower and -- upper bounds of the search (i.e. the root must be bracketed). ridders :: Double -> (Double, Double) -> (Double -> Double) -> Root Double instance Typeable1 Root instance Eq a => Eq (Root a) instance Read a => Read (Root a) instance Show a => Show (Root a) instance Data a => Data (Root a) instance Generic (Root a) instance Datatype D1Root instance Constructor C1_0Root instance Constructor C1_1Root instance Constructor C1_2Root instance Alternative Root instance Applicative Root instance MonadPlus Root instance Monad Root instance Functor Root instance Binary a => Binary (Root a) -- | Useful functions. module Statistics.Function -- | Compute the minimum and maximum of a vector in one pass. minMax :: Vector v Double => v Double -> (Double, Double) -- | Sort a vector. sort :: (Ord e, Vector v e) => v e -> v e -- | Sort a vector using a custom ordering. sortBy :: Vector v e => Comparison e -> v e -> v e -- | Partially sort a vector, such that the least k elements will be -- at the front. partialSort :: (Vector v e, Ord e) => Int -> v e -> v e -- | Zip a vector with its indices. indexed :: (Vector v e, Vector v Int, Vector v (Int, e)) => v e -> v (Int, e) -- | Return the indices of a vector. indices :: (Vector v a, Vector v Int) => v a -> v Int -- | Efficiently compute the next highest power of two for a non-negative -- integer. If the given value is already a power of two, it is returned -- unchanged. If negative, zero is returned. nextHighestPowerOfTwo :: Int -> Int -- | Compare two Double values for approximate equality, using -- Dawson's method. -- -- The required accuracy is specified in ULPs (units of least precision). -- If the two numbers differ by the given number of ULPs or less, this -- function returns True. within :: Int -> Double -> Double -> Bool -- | Functions for approximating quantiles, i.e. points taken at regular -- intervals from the cumulative distribution function of a random -- variable. -- -- The number of quantiles is described below by the variable q, -- so with q=4, a 4-quantile (also known as a quartile) has -- 4 intervals, and contains 5 points. The parameter k describes -- the desired point, where 0 ≤ kq. module Statistics.Quantile -- | O(n log n). Estimate the kth q-quantile of -- a sample, using the weighted average method. weightedAvg :: Vector v Double => Int -> Int -> v Double -> Double -- | Parameters a and b to the continuousBy function. data ContParam ContParam :: {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> ContParam -- | O(n log n). Estimate the kth q-quantile of -- a sample x, using the continuous sample method with the given -- parameters. This is the method used by most statistical software, such -- as R, Mathematica, SPSS, and S. continuousBy :: Vector v Double => ContParam -> Int -> Int -> v Double -> Double -- | O(n log n). Estimate the range between -- q-quantiles 1 and q-1 of a sample x, using the -- continuous sample method with the given parameters. -- -- For instance, the interquartile range (IQR) can be estimated as -- follows: -- --
--   midspread medianUnbiased 4 (U.fromList [1,1,2,2,3])
--   ==> 1.333333
--   
midspread :: Vector v Double => ContParam -> Int -> v Double -> Double -- | California Department of Public Works definition, a=0, -- b=1. Gives a linear interpolation of the empirical CDF. This -- corresponds to method 4 in R and Mathematica. cadpw :: ContParam -- | Hazen's definition, a=0.5, b=0.5. This is claimed to be -- popular among hydrologists. This corresponds to method 5 in R and -- Mathematica. hazen :: ContParam -- | Definition used by the S statistics application, with a=1, -- b=1. The interpolation points divide the sample range into -- n-1 intervals. This corresponds to method 7 in R and -- Mathematica. s :: ContParam -- | Definition used by the SPSS statistics application, with a=0, -- b=0 (also known as Weibull's definition). This corresponds to -- method 6 in R and Mathematica. spss :: ContParam -- | Median unbiased definition, a=1/3, b=1/3. The resulting -- quantile estimates are approximately median unbiased regardless of the -- distribution of x. This corresponds to method 8 in R and -- Mathematica. medianUnbiased :: ContParam -- | Normal unbiased definition, a=3/8, b=3/8. An -- approximately unbiased estimate if the empirical distribution -- approximates the normal distribution. This corresponds to method 9 in -- R and Mathematica. normalUnbiased :: ContParam -- | Functions for computing histograms of sample data. module Statistics.Sample.Histogram -- | O(n) Compute a histogram over a data set. -- -- The result consists of a pair of vectors: -- -- -- -- Interval (bin) sizes are uniform, and the upper and lower bounds are -- chosen automatically using the range function. To specify these -- parameters directly, use the histogram_ function. histogram :: (Vector v0 Double, Vector v1 Double, Num b, Vector v1 b) => Int -> v0 Double -> (v1 Double, v1 b) -- | O(n) Compute a histogram over a data set. -- -- Interval (bin) sizes are uniform, based on the supplied upper and -- lower bounds. histogram_ :: (Num b, RealFrac a, Vector v0 a, Vector v1 b) => Int -> a -> a -> v0 a -> v1 b -- | O(n) Compute decent defaults for the lower and upper bounds of -- a histogram, based on the desired number of bins and the range of the -- sample data. -- -- The upper and lower bounds used are (lo-d, hi+d), where -- --
--   d = (maximum sample - minimum sample) / ((bins - 1) * 2)
--   
range :: Vector v Double => Int -> v Double -> (Double, Double) -- | Kernel density estimation. This module provides a fast, robust, -- non-parametric way to estimate the probability density function of a -- sample. -- -- This estimator does not use the commonly employed "Gaussian rule of -- thumb". As a result, it outperforms many plug-in methods on multimodal -- samples with widely separated modes. module Statistics.Sample.KernelDensity -- | Gaussian kernel density estimator for one-dimensional data, using the -- method of Botev et al. -- -- The result is a pair of vectors, containing: -- -- kde :: Int -> Vector Double -> (Vector Double, Vector Double) -- | Gaussian kernel density estimator for one-dimensional data, using the -- method of Botev et al. -- -- The result is a pair of vectors, containing: -- -- kde_ :: Int -> Double -> Double -> Vector Double -> (Vector Double, Vector Double) -- | Very fast statistics over simple powers of a sample. These can all be -- computed efficiently in just a single pass over a sample, with that -- pass subject to stream fusion. -- -- The tradeoff is that some of these functions are less numerically -- robust than their counterparts in the Sample module. Where this -- is the case, the alternatives are noted. module Statistics.Sample.Powers data Powers -- | O(n) Collect the n simple powers of a sample. -- -- Functions computed over a sample's simple powers require at least a -- certain number (or order) of powers to be collected. -- -- -- -- This function is subject to stream fusion. powers :: Vector v Double => Int -> v Double -> Powers -- | The order (number) of simple powers collected from a sample. order :: Powers -> Int -- | The number of elements in the original Sample. This is the -- sample's zeroth simple power. count :: Powers -> Int -- | The sum of elements in the original Sample. This is the -- sample's first simple power. sum :: Powers -> Double -- | The arithmetic mean of elements in the original Sample. -- -- This is less numerically robust than the mean function in the -- Sample module, but the number is essentially free to compute if -- you have already collected a sample's simple powers. mean :: Powers -> Double -- | Maximum likelihood estimate of a sample's variance. Also known as the -- population variance, where the denominator is n. This is the -- second central moment of the sample. -- -- This is less numerically robust than the variance function in the -- Sample module, but the number is essentially free to compute if -- you have already collected a sample's simple powers. -- -- Requires Powers with order at least 2. variance :: Powers -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. stdDev :: Powers -> Double -- | Unbiased estimate of a sample's variance. Also known as the sample -- variance, where the denominator is n-1. -- -- Requires Powers with order at least 2. varianceUnbiased :: Powers -> Double -- | Compute the kth central moment of a sample. The central moment -- is also known as the moment about the mean. centralMoment :: Int -> Powers -> Double -- | Compute the skewness of a sample. This is a measure of the asymmetry -- of its distribution. -- -- A sample with negative skew is said to be left-skewed. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- --
--   skewness . powers 3 $ U.to [1,100,101,102,103]
--   ==> -1.497681449918257
--   
-- -- A sample with positive skew is said to be right-skewed. -- --
--   skewness . powers 3 $ U.to [1,2,3,4,100]
--   ==> 1.4975367033335198
--   
-- -- A sample's skewness is not defined if its variance is zero. -- -- Requires Powers with order at least 3. skewness :: Powers -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that the -- sample's variance is due more to infrequent severe deviations than to -- frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- Requires Powers with order at least 4. kurtosis :: Powers -> Double instance Typeable Powers instance Eq Powers instance Read Powers instance Show Powers instance Data Powers instance Generic Powers instance Datatype D1Powers instance Constructor C1_0Powers instance Binary Powers -- | Types for working with statistics. module Statistics.Types -- | A function that estimates a property of a sample, such as its -- mean. type Estimator = Sample -> Double -- | Sample data. type Sample = Vector Double -- | Sample with weights. First element of sample is data, second is weight type WeightedSample = Vector (Double, Double) -- | Weights for affecting the importance of elements of a sample. type Weights = Vector Double -- | Resampling statistics. module Statistics.Resampling -- | A resample drawn randomly, with replacement, from a set of data -- points. Distinct from a normal array to make it harder for your humble -- author's brain to go wrong. newtype Resample Resample :: Vector Double -> Resample fromResample :: Resample -> Vector Double -- | Compute a statistical estimate repeatedly over a sample, each time -- omitting a successive element. jackknife :: Estimator -> Sample -> Vector Double -- | O(e*r*s) Resample a data set repeatedly, with replacement, -- computing each estimate over the resampled data. -- -- This function is expensive; it has to do work proportional to -- e*r*s, where e is the number of estimation functions, -- r is the number of resamples to compute, and s is the -- number of original samples. -- -- To improve performance, this function will make use of all available -- CPUs. At least with GHC 7.0, parallel performance seems best if the -- parallel garbage collector is disabled (RTS option -qg). resample :: Gen (PrimState IO) -> [Estimator] -> Int -> Sample -> IO [Resample] instance Typeable Resample instance Eq Resample instance Read Resample instance Show Resample instance Data Resample instance Generic Resample instance Datatype D1Resample instance Constructor C1_0Resample instance Selector S1_0_0Resample instance Binary Resample -- | The Wilcoxon matched-pairs signed-rank test is non-parametric test -- which could be used to whether two related samples have different -- means. -- -- WARNING: current implementation contain critical bugs -- https://github.com/bos/statistics/issues/18 module Statistics.Test.WilcoxonT -- | The Wilcoxon matched-pairs signed-rank test. The samples are zipped -- together: if one is longer than the other, both are truncated to the -- the length of the shorter sample. -- -- For one-tailed test it tests whether first sample is significantly -- greater than the second. For two-tailed it checks whether they -- significantly differ -- -- Check wilcoxonMatchedPairSignedRank and -- wilcoxonMatchedPairSignificant for additional information. wilcoxonMatchedPairTest :: TestType -> Double -> Sample -> Sample -> Maybe TestResult -- | The Wilcoxon matched-pairs signed-rank test. -- -- The value returned is the pair (T+, T-). T+ is the sum of positive -- ranks (the ranks of the differences where the first parameter is -- higher) whereas T- is the sum of negative ranks (the ranks of the -- differences where the second parameter is higher). These values mean -- little by themselves, and should be combined with the -- wilcoxonSignificant function in this module to get a -- meaningful result. -- -- The samples are zipped together: if one is longer than the other, both -- are truncated to the the length of the shorter sample. -- -- Note that: wilcoxonMatchedPairSignedRank == ((x, y) -> (y, x)) . -- flip wilcoxonMatchedPairSignedRank wilcoxonMatchedPairSignedRank :: Sample -> Sample -> (Double, Double) -- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs -- test is significant at the given level. -- -- This function can perform a one-tailed or two-tailed test. If the -- first parameter to this function is TwoTailed, the test is -- performed two-tailed to check if the two samples differ significantly. -- If the first parameter is OneTailed, the check is performed -- one-tailed to decide whether the first sample (i.e. the first sample -- you passed to wilcoxonMatchedPairSignedRank) is greater than -- the second sample (i.e. the second sample you passed to -- wilcoxonMatchedPairSignedRank). If you wish to perform a -- one-tailed test in the opposite direction, you can either pass the -- parameters in a different order to -- wilcoxonMatchedPairSignedRank, or simply swap the values in the -- resulting pair before passing them to this function. wilcoxonMatchedPairSignificant :: TestType -> Int -> Double -> (Double, Double) -> Maybe TestResult -- | Works out the significance level (p-value) of a T value, given a -- sample size and a T value from the Wilcoxon signed-rank matched-pairs -- test. -- -- See the notes on wilcoxonCriticalValue for how this is -- calculated. wilcoxonMatchedPairSignificance :: Int -> Double -> Double -- | Obtains the critical value of T to compare against, given a sample -- size and a p-value (significance level). Your T value must be less -- than or equal to the return of this function in order for the test to -- work out significant. If there is a Nothing return, the sample size is -- too small to make a decision. -- -- wilcoxonSignificant tests the return value of -- wilcoxonMatchedPairSignedRank for you, so you should use -- wilcoxonSignificant for determining test results. However, -- this function is useful, for example, for generating lookup tables for -- Wilcoxon signed rank critical values. -- -- The return values of this function are generated using the method -- detailed in the paper "Critical Values for the Wilcoxon Signed Rank -- Statistic", Peter Mitic, The Mathematica Journal, volume 6, issue 3, -- 1996, which can be found here: -- http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf. -- According to that paper, the results may differ from other published -- lookup tables, but (Mitic claims) the values obtained by this function -- will be the correct ones. wilcoxonMatchedPairCriticalValue :: Int -> Double -> Maybe Int -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Types classes for probability distrubutions module Statistics.Distribution -- | Type class common to all distributions. Only c.d.f. could be defined -- for both discrete and continous distributions. class Distribution d where complCumulative d x = 1 - cumulative d x cumulative :: Distribution d => d -> Double -> Double complCumulative :: Distribution d => d -> Double -> Double -- | Discrete probability distribution. class Distribution d => DiscreteDistr d probability :: DiscreteDistr d => d -> Int -> Double -- | Continuous probability distributuion class Distribution d => ContDistr d density :: ContDistr d => d -> Double -> Double quantile :: ContDistr d => d -> Double -> Double -- | Type class for distributions with mean. maybeMean should return -- Nothing if it's undefined for current value of data class Distribution d => MaybeMean d maybeMean :: MaybeMean d => d -> Maybe Double -- | Type class for distributions with mean. If distribution have finite -- mean for all valid values of parameters it should be instance of this -- type class. class MaybeMean d => Mean d mean :: Mean d => d -> Double -- | Type class for distributions with variance. If variance is undefined -- for some parameter values both maybeVariance and -- maybeStdDev should return Nothing. -- -- Minimal complete definition is maybeVariance or -- maybeStdDev class MaybeMean d => MaybeVariance d where maybeVariance d = (*) <$> x <*> x where x = maybeStdDev d maybeStdDev = fmap sqrt . maybeVariance maybeVariance :: MaybeVariance d => d -> Maybe Double maybeStdDev :: MaybeVariance d => d -> Maybe Double -- | Type class for distributions with variance. If distibution have finite -- variance for all valid parameter values it should be instance of this -- type class. -- -- Minimal complete definition is variance or stdDev class (Mean d, MaybeVariance d) => Variance d where variance d = x * x where x = stdDev d stdDev = sqrt . variance variance :: Variance d => d -> Double stdDev :: Variance d => d -> Double -- | Generate discrete random variates which have given distribution. class Distribution d => ContGen d genContVar :: (ContGen d, PrimMonad m) => d -> Gen (PrimState m) -> m Double -- | Generate discrete random variates which have given distribution. -- ContGen is superclass because it's always possible to generate -- real-valued variates from integer values class (DiscreteDistr d, ContGen d) => DiscreteGen d genDiscreteVar :: (DiscreteGen d, PrimMonad m) => d -> Gen (PrimState m) -> m Int -- | Generate variates from continous distribution using inverse transform -- rule. genContinous :: (ContDistr d, PrimMonad m) => d -> Gen (PrimState m) -> m Double -- | Approximate the value of X for which -- P(x>X)=p. -- -- This method uses a combination of Newton-Raphson iteration and -- bisection with the given guess as a starting point. The upper and -- lower bounds specify the interval in which the probability -- distribution reaches the value p. findRoot :: ContDistr d => d -> Double -> Double -> Double -> Double -> Double -- | Sum probabilities in inclusive interval. sumProbabilities :: DiscreteDistr d => d -> Int -> Int -> Double module Statistics.Distribution.Beta -- | The beta distribution data BetaDistribution -- | Create beta distribution. Both shape parameters must be positive. betaDistr :: Double -> Double -> BetaDistribution -- | Create beta distribution. This construtor doesn't check parameters. improperBetaDistr :: Double -> Double -> BetaDistribution -- | Alpha shape parameter bdAlpha :: BetaDistribution -> Double -- | Beta shape parameter bdBeta :: BetaDistribution -> Double instance Typeable BetaDistribution instance Eq BetaDistribution instance Read BetaDistribution instance Show BetaDistribution instance Data BetaDistribution instance Generic BetaDistribution instance Datatype D1BetaDistribution instance Constructor C1_0BetaDistribution instance Selector S1_0_0BetaDistribution instance Selector S1_0_1BetaDistribution instance ContGen BetaDistribution instance ContDistr BetaDistribution instance MaybeVariance BetaDistribution instance Variance BetaDistribution instance MaybeMean BetaDistribution instance Mean BetaDistribution instance Distribution BetaDistribution instance Binary BetaDistribution -- | The binomial distribution. This is the discrete probability -- distribution of the number of successes in a sequence of n -- independent yes/no experiments, each of which yields success with -- probability p. module Statistics.Distribution.Binomial -- | The binomial distribution. data BinomialDistribution -- | Construct binomial distribution. Number of trials must be non-negative -- and probability must be in [0,1] range binomial :: Int -> Double -> BinomialDistribution -- | Number of trials. bdTrials :: BinomialDistribution -> Int -- | Probability. bdProbability :: BinomialDistribution -> Double instance Typeable BinomialDistribution instance Eq BinomialDistribution instance Read BinomialDistribution instance Show BinomialDistribution instance Data BinomialDistribution instance Generic BinomialDistribution instance Datatype D1BinomialDistribution instance Constructor C1_0BinomialDistribution instance Selector S1_0_0BinomialDistribution instance Selector S1_0_1BinomialDistribution instance MaybeVariance BinomialDistribution instance MaybeMean BinomialDistribution instance Variance BinomialDistribution instance Mean BinomialDistribution instance DiscreteDistr BinomialDistribution instance Distribution BinomialDistribution instance Binary BinomialDistribution -- | The Cauchy-Lorentz distribution. It's also known as Lorentz -- distribution or Breit–Wigner distribution. -- -- It doesn't have mean and variance. module Statistics.Distribution.CauchyLorentz -- | Cauchy-Lorentz distribution. data CauchyDistribution -- | Central value of Cauchy-Lorentz distribution which is its mode and -- median. Distribution doesn't have mean so function is named after -- median. cauchyDistribMedian :: CauchyDistribution -> Double -- | Scale parameter of Cauchy-Lorentz distribution. It's different from -- variance and specify half width at half maximum (HWHM). cauchyDistribScale :: CauchyDistribution -> Double -- | Cauchy distribution cauchyDistribution :: Double -> Double -> CauchyDistribution standardCauchy :: CauchyDistribution instance Typeable CauchyDistribution instance Eq CauchyDistribution instance Show CauchyDistribution instance Read CauchyDistribution instance Data CauchyDistribution instance Generic CauchyDistribution instance Datatype D1CauchyDistribution instance Constructor C1_0CauchyDistribution instance Selector S1_0_0CauchyDistribution instance Selector S1_0_1CauchyDistribution instance ContGen CauchyDistribution instance ContDistr CauchyDistribution instance Distribution CauchyDistribution instance Binary CauchyDistribution -- | The chi-squared distribution. This is a continuous probability -- distribution of sum of squares of k independent standard normal -- distributions. It's commonly used in statistical tests module Statistics.Distribution.ChiSquared -- | Chi-squared distribution data ChiSquared -- | Construct chi-squared distribution. Number of degrees of freedom must -- be positive. chiSquared :: Int -> ChiSquared -- | Get number of degrees of freedom chiSquaredNDF :: ChiSquared -> Int instance Typeable ChiSquared instance Eq ChiSquared instance Read ChiSquared instance Show ChiSquared instance Data ChiSquared instance Generic ChiSquared instance Datatype D1ChiSquared instance Constructor C1_0ChiSquared instance ContGen ChiSquared instance MaybeVariance ChiSquared instance MaybeMean ChiSquared instance Variance ChiSquared instance Mean ChiSquared instance ContDistr ChiSquared instance Distribution ChiSquared instance Binary ChiSquared -- | Fisher F distribution module Statistics.Distribution.FDistribution -- | F distribution data FDistribution fDistribution :: Int -> Int -> FDistribution fDistributionNDF1 :: FDistribution -> Double fDistributionNDF2 :: FDistribution -> Double instance Typeable FDistribution instance Eq FDistribution instance Show FDistribution instance Read FDistribution instance Data FDistribution instance Generic FDistribution instance Datatype D1FDistribution instance Constructor C1_0FDistribution instance Selector S1_0_0FDistribution instance Selector S1_0_1FDistribution instance Selector S1_0_2FDistribution instance ContGen FDistribution instance MaybeVariance FDistribution instance MaybeMean FDistribution instance ContDistr FDistribution instance Distribution FDistribution instance Binary FDistribution -- | The gamma distribution. This is a continuous probability distribution -- with two parameters, k and ϑ. If k is integral, the -- distribution represents the sum of k independent exponentially -- distributed random variables, each of which has a mean of ϑ. module Statistics.Distribution.Gamma -- | The gamma distribution. data GammaDistribution -- | Create gamma distribution. Both shape and scale parameters must be -- positive. gammaDistr :: Double -> Double -> GammaDistribution -- | Create gamma distribution. This constructor do not check whether -- parameters are valid improperGammaDistr :: Double -> Double -> GammaDistribution -- | Shape parameter, k. gdShape :: GammaDistribution -> Double -- | Scale parameter, ϑ. gdScale :: GammaDistribution -> Double instance Typeable GammaDistribution instance Eq GammaDistribution instance Read GammaDistribution instance Show GammaDistribution instance Data GammaDistribution instance Generic GammaDistribution instance Datatype D1GammaDistribution instance Constructor C1_0GammaDistribution instance Selector S1_0_0GammaDistribution instance Selector S1_0_1GammaDistribution instance ContGen GammaDistribution instance MaybeVariance GammaDistribution instance MaybeMean GammaDistribution instance Mean GammaDistribution instance Variance GammaDistribution instance ContDistr GammaDistribution instance Distribution GammaDistribution instance Binary GammaDistribution -- | The Poisson distribution. This is the discrete probability -- distribution of a number of events occurring in a fixed interval if -- these events occur with a known average rate, and occur independently -- from each other within that interval. module Statistics.Distribution.Poisson data PoissonDistribution -- | Create Poisson distribution. poisson :: Double -> PoissonDistribution poissonLambda :: PoissonDistribution -> Double instance Typeable PoissonDistribution instance Eq PoissonDistribution instance Read PoissonDistribution instance Show PoissonDistribution instance Data PoissonDistribution instance Generic PoissonDistribution instance Datatype D1PoissonDistribution instance Constructor C1_0PoissonDistribution instance Selector S1_0_0PoissonDistribution instance MaybeVariance PoissonDistribution instance MaybeMean PoissonDistribution instance Mean PoissonDistribution instance Variance PoissonDistribution instance DiscreteDistr PoissonDistribution instance Distribution PoissonDistribution instance Binary PoissonDistribution -- | The Geometric distribution. This is the probability distribution of -- the number of Bernoulli trials needed to get one success, supported on -- the set [1,2..]. -- -- This distribution is sometimes referred to as the shifted -- geometric distribution, to distinguish it from a variant measuring the -- number of failures before the first success, defined over the set -- [0,1..]. module Statistics.Distribution.Geometric data GeometricDistribution -- | Create geometric distribution. geometric :: Double -> GeometricDistribution gdSuccess :: GeometricDistribution -> Double instance Typeable GeometricDistribution instance Eq GeometricDistribution instance Read GeometricDistribution instance Show GeometricDistribution instance Data GeometricDistribution instance Generic GeometricDistribution instance Datatype D1GeometricDistribution instance Constructor C1_0GeometricDistribution instance Selector S1_0_0GeometricDistribution instance MaybeVariance GeometricDistribution instance MaybeMean GeometricDistribution instance Variance GeometricDistribution instance Mean GeometricDistribution instance DiscreteDistr GeometricDistribution instance Distribution GeometricDistribution instance Binary GeometricDistribution -- | The Hypergeometric distribution. This is the discrete probability -- distribution that measures the probability of k successes in -- l trials, without replacement, from a finite population. -- -- The parameters of the distribution describe k elements chosen -- from a population of l, with m elements of one type, and -- l-m of the other (all are positive integers). module Statistics.Distribution.Hypergeometric data HypergeometricDistribution hypergeometric :: Int -> Int -> Int -> HypergeometricDistribution hdM :: HypergeometricDistribution -> Int hdL :: HypergeometricDistribution -> Int hdK :: HypergeometricDistribution -> Int instance Typeable HypergeometricDistribution instance Eq HypergeometricDistribution instance Read HypergeometricDistribution instance Show HypergeometricDistribution instance Data HypergeometricDistribution instance Generic HypergeometricDistribution instance Datatype D1HypergeometricDistribution instance Constructor C1_0HypergeometricDistribution instance Selector S1_0_0HypergeometricDistribution instance Selector S1_0_1HypergeometricDistribution instance Selector S1_0_2HypergeometricDistribution instance MaybeVariance HypergeometricDistribution instance MaybeMean HypergeometricDistribution instance Variance HypergeometricDistribution instance Mean HypergeometricDistribution instance DiscreteDistr HypergeometricDistribution instance Distribution HypergeometricDistribution instance Binary HypergeometricDistribution -- | Transformations over distributions module Statistics.Distribution.Transform -- | Linear transformation applied to distribution. -- --
--   LinearTransform μ σ _
--   x' = μ + σ·x
--   
data LinearTransform d LinearTransform :: {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> d -> LinearTransform d -- | Location parameter. linTransLocation :: LinearTransform d -> {-# UNPACK #-} !Double -- | Scale parameter. linTransScale :: LinearTransform d -> {-# UNPACK #-} !Double -- | Distribution being transformed. linTransDistr :: LinearTransform d -> d -- | Get fixed point of linear transformation linTransFixedPoint :: LinearTransform d -> Double -- | Apply linear transformation to distribution. scaleAround :: Double -> Double -> d -> LinearTransform d instance Typeable1 LinearTransform instance Eq d => Eq (LinearTransform d) instance Show d => Show (LinearTransform d) instance Read d => Read (LinearTransform d) instance Data d => Data (LinearTransform d) instance Generic (LinearTransform d) instance Datatype D1LinearTransform instance Constructor C1_0LinearTransform instance Selector S1_0_0LinearTransform instance Selector S1_0_1LinearTransform instance Selector S1_0_2LinearTransform instance ContGen d => ContGen (LinearTransform d) instance Variance d => Variance (LinearTransform d) instance MaybeVariance d => MaybeVariance (LinearTransform d) instance Mean d => Mean (LinearTransform d) instance MaybeMean d => MaybeMean (LinearTransform d) instance ContDistr d => ContDistr (LinearTransform d) instance Distribution d => Distribution (LinearTransform d) instance Functor LinearTransform instance Binary d => Binary (LinearTransform d) -- | Student-T distribution module Statistics.Distribution.StudentT -- | Student-T distribution data StudentT -- | Create Student-T distribution. Number of parameters must be positive. studentT :: Double -> StudentT studentTndf :: StudentT -> Double -- | Create an unstandardized Student-t distribution. studentTUnstandardized :: Double -> Double -> Double -> LinearTransform StudentT instance Typeable StudentT instance Eq StudentT instance Show StudentT instance Read StudentT instance Data StudentT instance Generic StudentT instance Datatype D1StudentT instance Constructor C1_0StudentT instance Selector S1_0_0StudentT instance ContGen StudentT instance MaybeVariance StudentT instance MaybeMean StudentT instance ContDistr StudentT instance Distribution StudentT instance Binary StudentT -- | Variate distributed uniformly in the interval. module Statistics.Distribution.Uniform -- | Uniform distribution from A to B data UniformDistribution -- | Create uniform distribution. uniformDistr :: Double -> Double -> UniformDistribution -- | Low boundary of distribution uniformA :: UniformDistribution -> Double -- | Upper boundary of distribution uniformB :: UniformDistribution -> Double instance Typeable UniformDistribution instance Eq UniformDistribution instance Read UniformDistribution instance Show UniformDistribution instance Data UniformDistribution instance Generic UniformDistribution instance Datatype D1UniformDistribution instance Constructor C1_0UniformDistribution instance Selector S1_0_0UniformDistribution instance Selector S1_0_1UniformDistribution instance ContGen UniformDistribution instance MaybeVariance UniformDistribution instance MaybeMean UniformDistribution instance Variance UniformDistribution instance Mean UniformDistribution instance ContDistr UniformDistribution instance Distribution UniformDistribution instance Binary UniformDistribution -- | Pearson's chi squared test. module Statistics.Test.ChiSquared -- | Generic form of Pearson chi squared tests for binned data. Data sample -- is supplied in form of tuples (observed quantity, expected number of -- events). Both must be positive. chi2test :: (Vector v (Int, Double), Vector v Double) => Double -> Int -> v (Int, Double) -> TestResult -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Kolmogov-Smirnov tests are non-parametric tests for assesing whether -- given sample could be described by distribution or whether two samples -- have the same distribution. It's only applicable to continous -- distributions. module Statistics.Test.KolmogorovSmirnov -- | Check that sample could be described by distribution. -- Significant means distribution is not compatible with data for -- given p-value. -- -- This test uses Marsaglia-Tsang-Wang exact alogorithm for calculation -- of p-value. kolmogorovSmirnovTest :: Distribution d => d -> Double -> Sample -> TestResult -- | Variant of kolmogorovSmirnovTest which uses CFD in form of -- function. kolmogorovSmirnovTestCdf :: (Double -> Double) -> Double -> Sample -> TestResult -- | Two sample Kolmogorov-Smirnov test. It tests whether two data samples -- could be described by the same distribution without making any -- assumptions about it. -- -- This test uses approxmate formula for computing p-value. kolmogorovSmirnovTest2 :: Double -> Sample -> Sample -> TestResult -- | Calculate Kolmogorov's statistic D for given cumulative -- distribution function (CDF) and data sample. If sample is empty -- returns 0. kolmogorovSmirnovCdfD :: (Double -> Double) -> Sample -> Double -- | Calculate Kolmogorov's statistic D for given cumulative -- distribution function (CDF) and data sample. If sample is empty -- returns 0. kolmogorovSmirnovD :: Distribution d => d -> Sample -> Double -- | Calculate Kolmogorov's statistic D for two data samples. If -- either of samples is empty returns 0. kolmogorovSmirnov2D :: Sample -> Sample -> Double -- | Calculate cumulative probability function for Kolmogorov's -- distribution with n parameters or probability of getting value -- smaller than d with n-elements sample. -- -- It uses algorithm by Marsgalia et. al. and provide at least 7-digit -- accuracy. kolmogorovSmirnovProbability :: Int -> Double -> Double -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult instance Show Matrix -- | Constant values common to much statistics code. -- -- DEPRECATED: use module Constants from math-functions. -- | Deprecated: use module Numeric.MathFunctions.Constants from -- math-functions module Statistics.Constants -- | Commonly used sample statistics, also known as descriptive statistics. module Statistics.Sample -- | Sample data. type Sample = Vector Double -- | Sample with weights. First element of sample is data, second is weight type WeightedSample = Vector (Double, Double) -- | O(n) Range. The difference between the largest and smallest -- elements of a sample. range :: Vector v Double => v Double -> Double -- | O(n) Arithmetic mean. This uses Welford's algorithm to provide -- numerical stability, using a single pass over the sample data. mean :: Vector v Double => v Double -> Double -- | O(n) Arithmetic mean for weighted sample. It uses a single-pass -- algorithm analogous to the one used by mean. meanWeighted :: Vector v (Double, Double) => v (Double, Double) -> Double -- | O(n) Harmonic mean. This algorithm performs a single pass over -- the sample. harmonicMean :: Vector v Double => v Double -> Double -- | O(n) Geometric mean of a sample containing no negative values. geometricMean :: Vector v Double => v Double -> Double -- | Compute the kth central moment of a sample. The central moment -- is also known as the moment about the mean. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoment :: Vector v Double => Int -> v Double -> Double -- | Compute the kth and jth central moments of a sample. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoments :: Vector v Double => Int -> Int -> v Double -> (Double, Double) -- | Compute the skewness of a sample. This is a measure of the asymmetry -- of its distribution. -- -- A sample with negative skew is said to be left-skewed. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- --
--   skewness $ U.to [1,100,101,102,103]
--   ==> -1.497681449918257
--   
-- -- A sample with positive skew is said to be right-skewed. -- --
--   skewness $ U.to [1,2,3,4,100]
--   ==> 1.4975367033335198
--   
-- -- A sample's skewness is not defined if its variance is zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. skewness :: Vector v Double => v Double -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that more -- of the sample's variance is due to infrequent severe deviations, -- rather than more frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. kurtosis :: Vector v Double => v Double -> Double -- | Maximum likelihood estimate of a sample's variance. Also known as the -- population variance, where the denominator is n. variance :: Vector v Double => v Double -> Double -- | Unbiased estimate of a sample's variance. Also known as the sample -- variance, where the denominator is n-1. varianceUnbiased :: Vector v Double => v Double -> Double -- | Calculate mean and maximum likelihood estimate of variance. This -- function should be used if both mean and variance are required since -- it will calculate mean only once. meanVariance :: Vector v Double => v Double -> (Double, Double) -- | Calculate mean and unbiased estimate of variance. This function should -- be used if both mean and variance are required since it will calculate -- mean only once. meanVarianceUnb :: Vector v Double => v Double -> (Double, Double) -- | Standard deviation. This is simply the square root of the unbiased -- estimate of the variance. stdDev :: Vector v Double => v Double -> Double -- | Weighted variance. This is biased estimation. varianceWeighted :: Vector v (Double, Double) => v (Double, Double) -> Double -- | Maximum likelihood estimate of a sample's variance. fastVariance :: Vector v Double => v Double -> Double -- | Unbiased estimate of a sample's variance. fastVarianceUnbiased :: Vector v Double => v Double -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. fastStdDev :: Vector v Double => v Double -> Double -- | The exponential distribution. This is the continunous probability -- distribution of the times between events in a poisson process, in -- which events occur continuously and independently at a constant -- average rate. module Statistics.Distribution.Exponential data ExponentialDistribution -- | Create an exponential distribution. exponential :: Double -> ExponentialDistribution -- | Create exponential distribution from sample. No tests are made to -- check whether it truly is exponential. exponentialFromSample :: Sample -> ExponentialDistribution edLambda :: ExponentialDistribution -> Double instance Typeable ExponentialDistribution instance Eq ExponentialDistribution instance Read ExponentialDistribution instance Show ExponentialDistribution instance Data ExponentialDistribution instance Generic ExponentialDistribution instance Datatype D1ExponentialDistribution instance Constructor C1_0ExponentialDistribution instance Selector S1_0_0ExponentialDistribution instance ContGen ExponentialDistribution instance MaybeVariance ExponentialDistribution instance MaybeMean ExponentialDistribution instance Variance ExponentialDistribution instance Mean ExponentialDistribution instance ContDistr ExponentialDistribution instance Distribution ExponentialDistribution instance Binary ExponentialDistribution -- | The normal distribution. This is a continuous probability distribution -- that describes data that cluster around a mean. module Statistics.Distribution.Normal -- | The normal distribution. data NormalDistribution -- | Create normal distribution from parameters. -- -- IMPORTANT: prior to 0.10 release second parameter was variance not -- standard deviation. normalDistr :: Double -> Double -> NormalDistribution -- | Create distribution using parameters estimated from sample. Variance -- is estimated using maximum likelihood method (biased estimation). normalFromSample :: Sample -> NormalDistribution -- | Standard normal distribution with mean equal to 0 and variance equal -- to 1 standard :: NormalDistribution instance Typeable NormalDistribution instance Eq NormalDistribution instance Read NormalDistribution instance Show NormalDistribution instance Data NormalDistribution instance Generic NormalDistribution instance Datatype D1NormalDistribution instance Constructor C1_0NormalDistribution instance Selector S1_0_0NormalDistribution instance Selector S1_0_1NormalDistribution instance Selector S1_0_2NormalDistribution instance Selector S1_0_3NormalDistribution instance ContGen NormalDistribution instance Variance NormalDistribution instance MaybeVariance NormalDistribution instance Mean NormalDistribution instance MaybeMean NormalDistribution instance ContDistr NormalDistribution instance Distribution NormalDistribution instance Binary NormalDistribution -- | Mann-Whitney U test (also know as Mann-Whitney-Wilcoxon and Wilcoxon -- rank sum test) is a non-parametric test for assesing whether two -- samples of independent observations have different mean. module Statistics.Test.MannWhitneyU -- | Perform Mann-Whitney U Test for two samples and required significance. -- For additional information check documentation of mannWhitneyU -- and mannWhitneyUSignificant. This is just a helper function. -- -- One-tailed test checks whether first sample is significantly larger -- than second. Two-tailed whether they are significantly different. mannWhitneyUtest :: TestType -> Double -> Sample -> Sample -> Maybe TestResult -- | The Mann-Whitney U Test. -- -- This is sometimes known as the Mann-Whitney-Wilcoxon U test, and -- confusingly many sources state that the Mann-Whitney U test is the -- same as the Wilcoxon's rank sum test (which is provided as -- wilcoxonRankSums). The Mann-Whitney U is a simple transform of -- Wilcoxon's rank sum test. -- -- Again confusingly, different sources state reversed definitions for U₁ -- and U₂, so it is worth being explicit about what this function -- returns. Given two samples, the first, xs₁, of size n₁ and the second, -- xs₂, of size n₂, this function returns (U₁, U₂) where U₁ = W₁ - -- (n₁(n₁+1))/2 and U₂ = W₂ - (n₂(n₂+1))/2, where (W₁, W₂) is the return -- value of wilcoxonRankSums xs1 xs2. -- -- Some sources instead state that U₁ and U₂ should be the other way -- round, often expressing this using U₁' = n₁n₂ - U₁ (since U₁ + U₂ = -- n₁n₂). -- -- All of which you probably don't care about if you just feed this into -- mannWhitneyUSignificant. mannWhitneyU :: Sample -> Sample -> (Double, Double) -- | Calculates the critical value of Mann-Whitney U for the given sample -- sizes and significance level. -- -- This function returns the exact calculated value of U for all sample -- sizes; it does not use the normal approximation at all. Above sample -- size 20 it is generally recommended to use the normal approximation -- instead, but this function will calculate the higher critical values -- if you need them. -- -- The algorithm to generate these values is a faster, memoised version -- of the simple unoptimised generating function given in section 2 of -- "The Mann Whitney Wilcoxon Distribution Using Linked Lists" mannWhitneyUCriticalValue :: (Int, Int) -> Double -> Maybe Int -- | Calculates whether the Mann Whitney U test is significant. -- -- If both sample sizes are less than or equal to 20, the exact U -- critical value (as calculated by mannWhitneyUCriticalValue) is -- used. If either sample is larger than 20, the normal approximation is -- used instead. -- -- If you use a one-tailed test, the test indicates whether the first -- sample is significantly larger than the second. If you want the -- opposite, simply reverse the order in both the sample size and the -- (U₁, U₂) pairs. mannWhitneyUSignificant :: TestType -> (Int, Int) -> Double -> (Double, Double) -> Maybe TestResult -- | The Wilcoxon Rank Sums Test. -- -- This test calculates the sum of ranks for the given two samples. The -- samples are ordered, and assigned ranks (ties are given their average -- rank), then these ranks are summed for each sample. -- -- The return value is (W₁, W₂) where W₁ is the sum of ranks of the first -- sample and W₂ is the sum of ranks of the second sample. This test is -- trivially transformed into the Mann-Whitney U test. You will probably -- want to use mannWhitneyU and the related functions for testing -- significance, but this function is exposed for completeness. wilcoxonRankSums :: Sample -> Sample -> (Double, Double) -- | Test type. Exact meaning depends on a specific test. But generally -- it's tested whether some statistics is too big (small) for -- OneTailed or whether it too big or too small for -- TwoTailed data TestType OneTailed :: TestType TwoTailed :: TestType -- | Result of hypothesis testing data TestResult -- | Null hypothesis should be rejected Significant :: TestResult -- | Data is compatible with hypothesis NotSignificant :: TestResult -- | Functions for performing non-parametric tests (i.e. tests without an -- assumption of underlying distribution). -- | Deprecated: Use S.Test.MannWhitneyU and S.Test.WilcoxonT instead -- module Statistics.Test.NonParametric -- | The bootstrap method for statistical inference. module Statistics.Resampling.Bootstrap -- | A point and interval estimate computed via an Estimator. data Estimate Estimate :: {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> {-# UNPACK #-} !Double -> Estimate -- | Point estimate. estPoint :: Estimate -> {-# UNPACK #-} !Double -- | Lower bound of the estimate interval (i.e. the lower bound of the -- confidence interval). estLowerBound :: Estimate -> {-# UNPACK #-} !Double -- | Upper bound of the estimate interval (i.e. the upper bound of the -- confidence interval). estUpperBound :: Estimate -> {-# UNPACK #-} !Double -- | Confidence level of the confidence intervals. estConfidenceLevel :: Estimate -> {-# UNPACK #-} !Double -- | Bias-corrected accelerated (BCA) bootstrap. This adjusts for both bias -- and skewness in the resampled distribution. bootstrapBCA :: Double -> Sample -> [Estimator] -> [Resample] -> [Estimate] -- | Multiply the point, lower bound, and upper bound in an Estimate -- by the given value. scale :: Double -> Estimate -> Estimate instance Typeable Estimate instance Eq Estimate instance Read Estimate instance Show Estimate instance Data Estimate instance Generic Estimate instance Datatype D1Estimate instance Constructor C1_0Estimate instance Selector S1_0_0Estimate instance Selector S1_0_1Estimate instance Selector S1_0_2Estimate instance Selector S1_0_3Estimate instance NFData Estimate instance Binary Estimate -- | Kernel density estimation code, providing non-parametric ways to -- estimate the probability density function of a sample. -- -- The techniques used by functions in this module are relatively fast, -- but they generally give inferior results to the KDE function in the -- main KernelDensity module (due to the oversmoothing documented -- for bandwidth below). -- | Deprecated: Use Statistics.Sample.KernelDensity instead. module Statistics.Sample.KernelDensity.Simple -- | Simple Epanechnikov kernel density estimator. Returns the uniformly -- spaced points from the sample range at which the density function was -- estimated, and the estimates at those points. epanechnikovPDF :: Vector v Double => Int -> v Double -> (Points, Vector Double) -- | Simple Gaussian kernel density estimator. Returns the uniformly spaced -- points from the sample range at which the density function was -- estimated, and the estimates at those points. gaussianPDF :: Vector v Double => Int -> v Double -> (Points, Vector Double) -- | Points from the range of a Sample. newtype Points Points :: Vector Double -> Points fromPoints :: Points -> Vector Double -- | Choose a uniform range of points at which to estimate a sample's -- probability density function. -- -- If you are using a Gaussian kernel, multiply the sample's bandwidth by -- 3 before passing it to this function. -- -- If this function is passed an empty vector, it returns values of -- positive and negative infinity. choosePoints :: Vector v Double => Int -> Double -> v Double -> Points -- | The width of the convolution kernel used. type Bandwidth = Double -- | Compute the optimal bandwidth from the observed data for the given -- kernel. -- -- This function uses an estimate based on the standard deviation of a -- sample (due to Deheuvels), which performs reasonably well for unimodal -- distributions but leads to oversmoothing for more complex ones. bandwidth :: Vector v Double => (Double -> Bandwidth) -> v Double -> Bandwidth -- | Bandwidth estimator for an Epanechnikov kernel. epanechnikovBW :: Double -> Bandwidth -- | Bandwidth estimator for a Gaussian kernel. gaussianBW :: Double -> Bandwidth -- | The convolution kernel. Its parameters are as follows: -- -- type Kernel = Double -> Double -> Double -> Double -> Double -- | Epanechnikov kernel for probability density function estimation. epanechnikovKernel :: Kernel -- | Gaussian kernel for probability density function estimation. gaussianKernel :: Kernel -- | Kernel density estimator, providing a non-parametric way of estimating -- the PDF of a random variable. estimatePDF :: Vector v Double => Kernel -> Bandwidth -> v Double -> Points -> Vector Double -- | A helper for creating a simple kernel density estimation function with -- automatically chosen bandwidth and estimation points. simplePDF :: Vector v Double => (Double -> Double) -> Kernel -> Double -> Int -> v Double -> (Points, Vector Double) instance Typeable Points instance Eq Points instance Read Points instance Show Points instance Data Points instance Generic Points instance Datatype D1Points instance Constructor C1_0Points instance Selector S1_0_0Points instance Binary Points -- | Functions for computing autocovariance and autocorrelation of a -- sample. module Statistics.Autocorrelation -- | Compute the autocovariance of a sample, i.e. the covariance of the -- sample against a shifted version of itself. autocovariance :: (Vector v Double, Vector v Int) => v Double -> v Double -- | Compute the autocorrelation function of a sample, and the upper and -- lower bounds of confidence intervals for each element. -- -- Note: The calculation of the 95% confidence interval assumes a -- stationary Gaussian process. autocorrelation :: (Vector v Double, Vector v Int) => v Double -> (v Double, v Double, v Double)