-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A library of statistical types, data, and functions -- -- This library provides a number of common functions and types useful in -- statistics. Our focus is on high performance, numerical robustness, -- and use of good algorithms. Where possible, we provide references to -- the statistical literature. -- -- The library's facilities can be divided into four broad categories: -- -- Working with widely used discrete and continuous probability -- distributions. (There are dozens of exotic distributions in use; we -- focus on the most common.) -- -- Computing with sample data: quantile estimation, kernel density -- estimation, bootstrap methods, signigicance testing, and -- autocorrelation analysis. -- -- Random variate generation under several different distributions. -- -- Common statistical tests for significant differences between samples. @package statistics @version 0.8.0.1 -- | Useful functions. module Statistics.Function -- | Compute the minimum and maximum of a vector in one pass. minMax :: (Vector v Double) => v Double -> (Double, Double) -- | Sort a vector. sort :: (Ord e, Vector v e) => v e -> v e -- | Partially sort a vector, such that the least k elements will be -- at the front. partialSort :: (Vector v e, Ord e) => Int -> v e -> v e -- | Zip a vector with its indices. indexed :: (Vector v e, Vector v Int, Vector v (Int, e)) => v e -> v (Int, e) -- | Return the indices of a vector. indices :: (Vector v a, Vector v Int) => v a -> v Int -- | Create a vector, using the given action to populate each element. create :: (PrimMonad m, Vector v e) => Int -> (Int -> m e) -> m (v e) -- | Types for working with statistics. module Statistics.Types -- | A function that estimates a property of a sample, such as its -- mean. type Estimator = Sample -> Double -- | Sample data. type Sample = Vector Double -- | Sample with weights. First element of sample is data, second is weight type WeightedSample = Vector (Double, Double) -- | Weights for affecting the importance of elements of a sample. type Weights = Vector Double -- | Resampling statistics. module Statistics.Resampling -- | A resample drawn randomly, with replacement, from a set of data -- points. Distinct from a normal array to make it harder for your humble -- author's brain to go wrong. newtype Resample Resample :: Vector Double -> Resample fromResample :: Resample -> Vector Double -- | Compute a statistical estimate repeatedly over a sample, each time -- omitting a successive element. jackknife :: Estimator -> Sample -> Vector Double -- | Resample a data set repeatedly, with replacement, computing each -- estimate over the resampled data. resample :: (PrimMonad m) => Gen (PrimState m) -> [Estimator] -> Int -> Sample -> m [Resample] instance Eq Resample instance Show Resample -- | Types classes for probability distrubutions module Statistics.Distribution -- | Type class common to all distributions. Only c.d.f. could be defined -- for both discrete and continous distributions. class Distribution d cumulative :: (Distribution d) => d -> Double -> Double -- | Discrete probability distribution. class (Distribution d) => DiscreteDistr d probability :: (DiscreteDistr d) => d -> Int -> Double -- | Continuous probability distributuion class (Distribution d) => ContDistr d density :: (ContDistr d) => d -> Double -> Double quantile :: (ContDistr d) => d -> Double -> Double -- | Type class for distributions with mean. class (Distribution d) => Mean d mean :: (Mean d) => d -> Double -- | Type class for distributions with variance. class (Mean d) => Variance d variance :: (Variance d) => d -> Double -- | Approximate the value of X for which -- P(x>X)=p. -- -- This method uses a combination of Newton-Raphson iteration and -- bisection with the given guess as a starting point. The upper and -- lower bounds specify the interval in which the probability -- distribution reaches the value p. findRoot :: (ContDistr d) => d -> Double -> Double -> Double -> Double -> Double -- | Sum probabilities in inclusive interval. sumProbabilities :: (DiscreteDistr d) => d -> Int -> Int -> Double -- | The Geometric distribution. This is the probability distribution of -- the number of Bernoulli trials needed to get one success, supported on -- the set [1,2..]. -- -- This distribution is sometimes referred to as the shifted -- geometric distribution, to distinguish it from a variant measuring the -- number of failures before the first success, defined over the set -- [0,1..]. module Statistics.Distribution.Geometric data GeometricDistribution -- | Create geometric distribution geometric :: Double -> GeometricDistribution gdSuccess :: GeometricDistribution -> Double instance Typeable GeometricDistribution instance Eq GeometricDistribution instance Read GeometricDistribution instance Show GeometricDistribution instance Mean GeometricDistribution instance Variance GeometricDistribution instance DiscreteDistr GeometricDistribution instance Distribution GeometricDistribution -- | Constant values common to much statistics code. module Statistics.Constants -- | The smallest Double ε such that 1 + ε ≠ 1. m_epsilon :: Double -- | A very large number. m_huge :: Double -- |
-- 1 / sqrt 2 --m_1_sqrt_2 :: Double -- |
-- 2 / sqrt pi --m_2_sqrt_pi :: Double -- |
-- log(sqrt((2*pi)) / 2 --m_ln_sqrt_2_pi :: Double -- | The largest Int x such that 2**(x-1) is -- approximately representable as a Double. m_max_exp :: Int -- |
-- sqrt 2 --m_sqrt_2 :: Double -- |
-- sqrt (2 * pi) --m_sqrt_2_pi :: Double -- | Positive infinity. m_pos_inf :: Double -- | Negative infinity. m_neg_inf :: Double -- | Not a number. m_NaN :: Double -- | Functions for approximating quantiles, i.e. points taken at regular -- intervals from the cumulative distribution function of a random -- variable. -- -- The number of quantiles is described below by the variable q, -- so with q=4, a 4-quantile (also known as a quartile) has -- 4 intervals, and contains 5 points. The parameter k describes -- the desired point, where 0 ≤ k ≤ q. module Statistics.Quantile -- | O(n log n). Estimate the kth q-quantile of -- a sample, using the weighted average method. weightedAvg :: (Vector v Double) => Int -> Int -> v Double -> Double -- | Parameters a and b to the continuousBy function. data ContParam ContParam :: !!Double -> !!Double -> ContParam -- | O(n log n). Estimate the kth q-quantile of -- a sample x, using the continuous sample method with the given -- parameters. This is the method used by most statistical software, such -- as R, Mathematica, SPSS, and S. continuousBy :: (Vector v Double) => ContParam -> Int -> Int -> v Double -> Double -- | O(n log n). Estimate the range between -- q-quantiles 1 and q-1 of a sample x, using the -- continuous sample method with the given parameters. -- -- For instance, the interquartile range (IQR) can be estimated as -- follows: -- --
-- midspread medianUnbiased 4 (U.fromList [1,1,2,2,3]) -- ==> 1.333333 --midspread :: (Vector v Double) => ContParam -> Int -> v Double -> Double -- | California Department of Public Works definition, a=0, -- b=1. Gives a linear interpolation of the empirical CDF. This -- corresponds to method 4 in R and Mathematica. cadpw :: ContParam -- | Hazen's definition, a=0.5, b=0.5. This is claimed to be -- popular among hydrologists. This corresponds to method 5 in R and -- Mathematica. hazen :: ContParam -- | Definition used by the S statistics application, with a=1, -- b=1. The interpolation points divide the sample range into -- n-1 intervals. This corresponds to method 7 in R and -- Mathematica. s :: ContParam -- | Definition used by the SPSS statistics application, with a=0, -- b=0 (also known as Weibull's definition). This corresponds to -- method 6 in R and Mathematica. spss :: ContParam -- | Median unbiased definition, a=1/3, b=1/3. The resulting -- quantile estimates are approximately median unbiased regardless of the -- distribution of x. This corresponds to method 8 in R and -- Mathematica. medianUnbiased :: ContParam -- | Normal unbiased definition, a=3/8, b=3/8. An -- approximately unbiased estimate if the empirical distribution -- approximates the normal distribution. This corresponds to method 9 in -- R and Mathematica. normalUnbiased :: ContParam -- | Commonly used sample statistics, also known as descriptive statistics. module Statistics.Sample -- | Sample data. type Sample = Vector Double -- | Sample with weights. First element of sample is data, second is weight type WeightedSample = Vector (Double, Double) range :: (Vector v Double) => v Double -> Double -- | Arithmetic mean. This uses Welford's algorithm to provide numerical -- stability, using a single pass over the sample data. mean :: (Vector v Double) => v Double -> Double -- | Arithmetic mean for weighted sample. It uses algorithm analogous to -- one in mean meanWeighted :: (Vector v (Double, Double)) => v (Double, Double) -> Double -- | Harmonic mean. This algorithm performs a single pass over the sample. harmonicMean :: (Vector v Double) => v Double -> Double -- | Geometric mean of a sample containing no negative values. geometricMean :: (Vector v Double) => v Double -> Double -- | Compute the kth central moment of a sample. The central moment -- is also known as the moment about the mean. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoment :: (Vector v Double) => Int -> v Double -> Double -- | Compute the kth and jth central moments of a sample. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. centralMoments :: (Vector v Double) => Int -> Int -> v Double -> (Double, Double) -- | Compute the skewness of a sample. This is a measure of the asymmetry -- of its distribution. -- -- A sample with negative skew is said to be left-skewed. Most of -- its mass is on the right of the distribution, with the tail on the -- left. -- --
-- skewness $ U.to [1,100,101,102,103] -- ==> -1.497681449918257 ---- -- A sample with positive skew is said to be right-skewed. -- --
-- skewness $ U.to [1,2,3,4,100] -- ==> 1.4975367033335198 ---- -- A sample's skewness is not defined if its variance is zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. skewness :: (Vector v Double) => v Double -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that more -- of the sample's variance is due to infrequent severe deviations, -- rather than more frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- This function performs two passes over the sample, so is not subject -- to stream fusion. -- -- For samples containing many values very close to the mean, this -- function is subject to inaccuracy due to catastrophic cancellation. kurtosis :: (Vector v Double) => v Double -> Double -- | Maximum likelihood estimate of a sample's variance. Also known as the -- population variance, where the denominator is n. variance :: (Vector v Double) => v Double -> Double -- | Unbiased estimate of a sample's variance. Also known as the sample -- variance, where the denominator is n-1. varianceUnbiased :: (Vector v Double) => v Double -> Double -- | Calculate mean and maximum likelihood estimate of variance. This -- function should be used if both mean and variance are required since -- it will calculate mean only once. meanVariance :: (Vector v Double) => v Double -> (Double, Double) -- | Calculate mean and unbiased estimate of variance. This function should -- be used if both mean and variance are required since it will calculate -- mean only once. meanVarianceUnb :: (Vector v Double) => v Double -> (Double, Double) -- | Standard deviation. This is simply the square root of the unbiased -- estimate of the variance. stdDev :: (Vector v Double) => v Double -> Double -- | Weighted variance. This is biased estimation. varianceWeighted :: (Vector v (Double, Double)) => v (Double, Double) -> Double -- | Maximum likelihood estimate of a sample's variance. fastVariance :: (Vector v Double) => v Double -> Double -- | Unbiased estimate of a sample's variance. fastVarianceUnbiased :: (Vector v Double) => v Double -> Double -- | Standard deviation. This is simply the square root of the maximum -- likelihood estimate of the variance. fastStdDev :: (Vector v Double) => v Double -> Double -- | The exponential distribution. This is the continunous probability -- distribution of the times between events in a poisson process, in -- which events occur continuously and independently at a constant -- average rate. module Statistics.Distribution.Exponential data ExponentialDistribution -- | Create exponential distribution exponential :: Double -> ExponentialDistribution -- | Create exponential distribution from sample. No tests are made to -- check whether it really exponential exponentialFromSample :: Sample -> ExponentialDistribution edLambda :: ExponentialDistribution -> Double instance Typeable ExponentialDistribution instance Eq ExponentialDistribution instance Read ExponentialDistribution instance Show ExponentialDistribution instance Mean ExponentialDistribution instance Variance ExponentialDistribution instance ContDistr ExponentialDistribution instance Distribution ExponentialDistribution -- | The normal distribution. This is a continuous probability distribution -- that describes data that cluster around a mean. module Statistics.Distribution.Normal -- | The normal distribution. data NormalDistribution -- | Create normal distribution from parameters normalDistr :: Double -> Double -> NormalDistribution -- | Create distribution using parameters estimated from sample. Variance -- is estimated using maximum likelihood method (biased estimation). normalFromSample :: Sample -> NormalDistribution -- | Standard normal distribution with mean equal to 0 and variance equal -- to 1 standard :: NormalDistribution instance Typeable NormalDistribution instance Eq NormalDistribution instance Read NormalDistribution instance Show NormalDistribution instance Mean NormalDistribution instance Variance NormalDistribution instance ContDistr NormalDistribution instance Distribution NormalDistribution -- | Mathematical functions for statistics. module Statistics.Math -- | Compute the binomial coefficient n `choose` -- k. For values of k > 30, this uses an approximation -- for performance reasons. The approximation is accurate to 12 decimal -- places in the worst case -- -- Example: -- --
-- 7 `choose` 3 == 35 --choose :: Int -> Int -> Double -- | Compute the natural logarithm of the beta function. logBeta :: Double -> Double -> Double -- | Evaluate a Chebyshev polynomial of the first kind. Uses Clenshaw's -- algorithm. chebyshev :: (Vector v Double) => Double -> v Double -> Double -- | Evaluate a Chebyshev polynomial of the first kind. Uses Broucke's -- ECHEB algorithm, and his convention for coefficient handling, and so -- gives different results than chebyshev for the same inputs. chebyshevBroucke :: (Vector v Double) => Double -> v Double -> Double -- | Compute the factorial function n!. Returns ∞ if the input is -- above 170 (above which the result cannot be represented by a 64-bit -- Double). factorial :: Int -> Double -- | Compute the natural logarithm of the factorial function. Gives 16 -- decimal digits of precision. logFactorial :: Int -> Double -- | Compute the normalized lower incomplete gamma function -- γ(s,x). Normalization means that γ(s,∞)=1. Uses -- Algorithm AS 239 by Shea. incompleteGamma :: Double -> Double -> Double -- | Compute the logarithm of the gamma function Γ(x). Uses -- Algorithm AS 245 by Macleod. -- -- Gives an accuracy of 10–12 significant decimal digits, except for -- small regions around x = 1 and x = 2, where the function -- goes to zero. For greater accuracy, use logGammaL. -- -- Returns ∞ if the input is outside of the range (0 < x ≤ -- 1e305). logGamma :: Double -> Double -- | Compute the logarithm of the gamma function, Γ(x). Uses a -- Lanczos approximation. -- -- This function is slower than logGamma, but gives 14 or more -- significant decimal digits of accuracy, except around x = 1 and -- x = 2, where the function goes to zero. -- -- Returns ∞ if the input is outside of the range (0 < x ≤ -- 1e305). logGammaL :: Double -> Double -- | Compute the natural logarithm of 1 + x. This is accurate even -- for values of x near zero, where use of log(1+x) -- would lose precision. log1p :: Double -> Double -- | The binomial distribution. This is the discrete probability -- distribution of the number of successes in a sequence of n -- independent yes/no experiments, each of which yields success with -- probability p. module Statistics.Distribution.Binomial -- | The binomial distribution. data BinomialDistribution -- | Construct binomial distribution binomial :: Int -> Double -> BinomialDistribution -- | Number of trials. bdTrials :: BinomialDistribution -> Int -- | Probability. bdProbability :: BinomialDistribution -> Double instance Typeable BinomialDistribution instance Eq BinomialDistribution instance Read BinomialDistribution instance Show BinomialDistribution instance Mean BinomialDistribution instance Variance BinomialDistribution instance DiscreteDistr BinomialDistribution instance Distribution BinomialDistribution -- | The chi-squared distribution. This is a continuous probability -- distribution of sum of squares of k independent standard normal -- distributions. It's commonly used in statistical tests module Statistics.Distribution.ChiSquared -- | Chi-squared distribution data ChiSquared -- | Construct chi-squared distribution. Number of degrees of free chiSquared :: Int -> ChiSquared -- | Get number of degrees of freedom chiSquaredNDF :: ChiSquared -> Int instance Typeable ChiSquared instance Show ChiSquared instance Variance ChiSquared instance Mean ChiSquared instance ContDistr ChiSquared instance Distribution ChiSquared -- | The gamma distribution. This is a continuous probability distribution -- with two parameters, k and ϑ. If k is integral, the -- distribution represents the sum of k independent exponentially -- distributed random variables, each of which has a mean of ϑ. module Statistics.Distribution.Gamma -- | The gamma distribution. data GammaDistribution gammaDistr :: Double -> Double -> GammaDistribution -- | Shape parameter, k. gdShape :: GammaDistribution -> Double -- | Scale parameter, ϑ. gdScale :: GammaDistribution -> Double instance Typeable GammaDistribution instance Eq GammaDistribution instance Read GammaDistribution instance Show GammaDistribution instance Mean GammaDistribution instance Variance GammaDistribution instance ContDistr GammaDistribution instance Distribution GammaDistribution -- | The Hypergeometric distribution. This is the discrete probability -- distribution that measures the probability of k successes in -- l trials, without replacement, from a finite population. -- -- The parameters of the distribution describe k elements chosen -- from a population of l, with m elements of one type, and -- l-m of the other (all are positive integers). module Statistics.Distribution.Hypergeometric data HypergeometricDistribution hypergeometric :: Int -> Int -> Int -> HypergeometricDistribution hdM :: HypergeometricDistribution -> Int hdL :: HypergeometricDistribution -> Int hdK :: HypergeometricDistribution -> Int instance Typeable HypergeometricDistribution instance Eq HypergeometricDistribution instance Read HypergeometricDistribution instance Show HypergeometricDistribution instance Mean HypergeometricDistribution instance Variance HypergeometricDistribution instance DiscreteDistr HypergeometricDistribution instance Distribution HypergeometricDistribution -- | The Poisson distribution. This is the discrete probability -- distribution of a number of events occurring in a fixed interval if -- these events occur with a known average rate, and occur independently -- from each other within that interval. module Statistics.Distribution.Poisson data PoissonDistribution -- | Create po poisson :: Double -> PoissonDistribution poissonLambda :: PoissonDistribution -> Double instance Typeable PoissonDistribution instance Eq PoissonDistribution instance Read PoissonDistribution instance Show PoissonDistribution instance Mean PoissonDistribution instance Variance PoissonDistribution instance DiscreteDistr PoissonDistribution instance Distribution PoissonDistribution -- | Very fast statistics over simple powers of a sample. These can all be -- computed efficiently in just a single pass over a sample, with that -- pass subject to stream fusion. -- -- The tradeoff is that some of these functions are less numerically -- robust than their counterparts in the Statistics.Sample -- module. Where this is the case, the alternatives are noted. module Statistics.Sample.Powers data Powers -- | O(n) Collect the n simple powers of a sample. -- -- Functions computed over a sample's simple powers require at least a -- certain number (or order) of powers to be collected. -- --
-- skewness . powers 3 $ U.to [1,100,101,102,103] -- ==> -1.497681449918257 ---- -- A sample with positive skew is said to be right-skewed. -- --
-- skewness . powers 3 $ U.to [1,2,3,4,100] -- ==> 1.4975367033335198 ---- -- A sample's skewness is not defined if its variance is zero. -- -- Requires Powers with order at least 3. skewness :: Powers -> Double -- | Compute the excess kurtosis of a sample. This is a measure of the -- "peakedness" of its distribution. A high kurtosis indicates that the -- sample's variance is due more to infrequent severe deviations than to -- frequent modest deviations. -- -- A sample's excess kurtosis is not defined if its variance is -- zero. -- -- Requires Powers with order at least 4. kurtosis :: Powers -> Double instance Eq Powers instance Show Powers -- | Functions for performing non-parametric tests (i.e. tests without an -- assumption of underlying distribution). module Statistics.Test.NonParametric -- | The Mann-Whitney U Test. -- -- This is sometimes known as the Mann-Whitney-Wilcoxon U test, and -- confusingly many sources state that the Mann-Whitney U test is the -- same as the Wilcoxon's rank sum test (which is provided as -- wilcoxonRankSums). The Mann-Whitney U is a simple transform of -- Wilcoxon's rank sum test. -- -- Again confusingly, different sources state reversed definitions for -- U_1 and U_2, so it is worth being explicit about what this function -- returns. Given two samples, the first, xs_1, of size n_1 and the -- second, xs_2, of size n_2, this function returns (U_1, U_2) where U_1 -- = W_1 - (n_1*(n_1+1))/2 and U_2 = W_2 - (n_2*(n_2+1))/2, where (W_1, -- W_2) is the return value of wilcoxonRankSums xs1 xs2. -- -- Some sources instead state that U_1 and U_2 should be the other way -- round, often expressing this using U_1' = n_1*n_2 - U_1 (since U_1 + -- U_2 = n_1*n*2). -- -- All of which you probably don't care about if you just feed this into -- mannWhitneyUSignificant. mannWhitneyU :: Sample -> Sample -> (Double, Double) -- | Calculates the critical value of Mann-Whitney U for the given sample -- sizes and significance level. -- -- This function returns the exact calculated value of U for all sample -- sizes; it does not use the normal approximation at all. Above sample -- size 20 it is generally recommended to use the normal approximation -- instead, but this function will calculate the higher critical values -- if you need them. -- -- The algorithm to generate these values is a faster, memoised version -- of the simple unoptimised generating function given in section 2 of -- "The Mann Whitney Wilcoxon Distribution Using Linked Lists", Cheung -- and Klotz, Statistica Sinica 7 (1997), -- http://www3.stat.sinica.edu.tw/statistica/oldpdf/A7n316.pdf. mannWhitneyUCriticalValue :: (Int, Int) -> Double -> Maybe Int -- | Calculates whether the Mann Whitney U test is significant. -- -- If both sample sizes are less than or equal to 20, the exact U -- critical value (as calculated by mannWhitneyUCriticalValue) is -- used. If either sample is larger than 20, the normal approximation is -- used instead. -- -- If you use a one-tailed test, the test indicates whether the first -- sample is significantly larger than the second. If you want the -- opposite, simply reverse the order in both the sample size and the -- (U_1, U_2) pairs. mannWhitneyUSignificant :: Bool -> (Int, Int) -> Double -> (Double, Double) -> Maybe Bool -- | The Wilcoxon matched-pairs signed-rank test. -- -- The value returned is the pair (T+, T-). T+ is the sum of positive -- ranks (the ranks of the differences where the first parameter is -- higher) whereas T- is the sum of negative ranks (the ranks of the -- differences where the second parameter is higher). These values mean -- little by themselves, and should be combined with the -- wilcoxonSignificant function in this module to get a -- meaningful result. -- -- The samples are zipped together: if one is longer than the other, both -- are truncated to the the length of the shorter sample. -- -- Note that: wilcoxonMatchedPairSignedRank == ((x, y) -> (y, x)) . -- flip wilcoxonMatchedPairSignedRank wilcoxonMatchedPairSignedRank :: Sample -> Sample -> (Double, Double) -- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs -- test is significant at the given level. -- -- This function can perform a one-tailed or two-tailed test. If the -- first parameter to this function is False, the test is performed -- two-tailed to check if the two samples differ significantly. If the -- first parameter is True, the check is performed one-tailed to decide -- whether the first sample (i.e. the first sample you passed to -- wilcoxonMatchedPairSignedRank) is greater than the second -- sample (i.e. the second sample you passed to -- wilcoxonMatchedPairSignedRank). If you wish to perform a -- one-tailed test in the opposite direction, you can either pass the -- parameters in a different order to -- wilcoxonMatchedPairSignedRank, or simply swap the values in the -- resulting pair before passing them to this function. wilcoxonMatchedPairSignificant :: Bool -> Int -> Double -> (Double, Double) -> Maybe Bool -- | Works out the significance level (p-value) of a T value, given a -- sample size and a T value from the Wilcoxon signed-rank matched-pairs -- test. -- -- See the notes on wilcoxonCriticalValue for how this is -- calculated. wilcoxonMatchedPairSignificance :: Int -> Double -> Double -- | Obtains the critical value of T to compare against, given a sample -- size and a p-value (significance level). Your T value must be less -- than or equal to the return of this function in order for the test to -- work out significant. If there is a Nothing return, the sample size is -- too small to make a decision. -- -- wilcoxonSignificant tests the return value of -- wilcoxonMatchedPairSignedRank for you, so you should use -- wilcoxonSignificant for determining test results. However, -- this function is useful, for example, for generating lookup tables for -- Wilcoxon signed rank critical values. -- -- The return values of this function are generated using the method -- detailed in the paper "Critical Values for the Wilcoxon Signed Rank -- Statistic", Peter Mitic, The Mathematica Journal, volume 6, issue 3, -- 1996, which can be found here: -- http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf. -- According to that paper, the results may differ from other published -- lookup tables, but (Mitic claims) the values obtained by this function -- will be the correct ones. wilcoxonMatchedPairCriticalValue :: Int -> Double -> Maybe Int -- | The Wilcoxon Rank Sums Test. -- -- This test calculates the sum of ranks for the given two samples. The -- samples are ordered, and assigned ranks (ties are given their average -- rank), then these ranks are summed for each sample. -- -- The return value is (W_1, W_2) where W_1 is the sum of ranks of the -- first sample and W_2 is the sum of ranks of the second sample. This -- test is trivially transformed into the Mann-Whitney U test. You will -- probably want to use mannWhitneyU and the related functions for -- testing significance, but this function is exposed for completeness. wilcoxonRankSums :: Sample -> Sample -> (Double, Double) -- | Kernel density estimation code, providing non-parametric ways to -- estimate the probability density function of a sample. module Statistics.KernelDensity -- | Simple Epanechnikov kernel density estimator. Returns the uniformly -- spaced points from the sample range at which the density function was -- estimated, and the estimates at those points. epanechnikovPDF :: (Vector v Double) => Int -> v Double -> (Points, Vector Double) -- | Simple Gaussian kernel density estimator. Returns the uniformly spaced -- points from the sample range at which the density function was -- estimated, and the estimates at those points. gaussianPDF :: (Vector v Double) => Int -> v Double -> (Points, Vector Double) -- | Points from the range of a Sample. newtype Points Points :: Vector Double -> Points fromPoints :: Points -> Vector Double -- | Choose a uniform range of points at which to estimate a sample's -- probability density function. -- -- If you are using a Gaussian kernel, multiply the sample's bandwidth by -- 3 before passing it to this function. -- -- If this function is passed an empty vector, it returns values of -- positive and negative infinity. choosePoints :: (Vector v Double) => Int -> Double -> v Double -> Points -- | The width of the convolution kernel used. type Bandwidth = Double -- | Compute the optimal bandwidth from the observed data for the given -- kernel. bandwidth :: (Vector v Double) => (Double -> Bandwidth) -> v Double -> Bandwidth -- | Bandwidth estimator for an Epanechnikov kernel. epanechnikovBW :: Double -> Bandwidth -- | Bandwidth estimator for a Gaussian kernel. gaussianBW :: Double -> Bandwidth -- | The convolution kernel. Its parameters are as follows: -- --