```-- |
-- Module    : Statistics.Test.NonParametric
-- Copyright : (c) 2010 Neil Brown
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- Functions for performing non-parametric tests (i.e. tests without an assumption
-- of underlying distribution).
module Statistics.Test.NonParametric
(-- * Mann-Whitney U test (non-parametric equivalent to the independent t-test)
mannWhitneyU, mannWhitneyUCriticalValue, mannWhitneyUSignificant,
-- * Wilcoxon signed-rank matched-pair test (non-parametric equivalent to the paired t-test)
wilcoxonMatchedPairSignedRank, wilcoxonMatchedPairSignificant, wilcoxonMatchedPairSignificance, wilcoxonMatchedPairCriticalValue,
-- * Wilcoxon rank sum test
wilcoxonRankSums) where

import Control.Applicative ((<\$>))
import Control.Arrow ((***))
import Data.Function (on)
import Data.List (findIndex, groupBy, partition, sortBy)
import Data.Ord (comparing)
import qualified Data.Vector.Unboxed as U (length, toList, zipWith)

import Statistics.Distribution (quantile)
import Statistics.Distribution.Normal (standard)
import Statistics.Math (choose)
import Statistics.Types (Sample)

-- | The Wilcoxon Rank Sums Test.
--
-- This test calculates the sum of ranks for the given two samples.  The samples
-- are ordered, and assigned ranks (ties are given their average rank), then these
-- ranks are summed for each sample.
--
-- The return value is (W_1, W_2) where W_1 is the sum of ranks of the first sample
-- and W_2 is the sum of ranks of the second sample.  This test is trivially transformed
-- into the Mann-Whitney U test.  You will probably want to use 'mannWhitneyU'
-- and the related functions for testing significance, but this function is exposed
-- for completeness.
wilcoxonRankSums :: Sample -> Sample -> (Double, Double)
wilcoxonRankSums xs1 xs2
= ((sum . map fst) *** (sum . map fst)) . -- sum the ranks per group
partition snd . -- split them back into left and right
concatMap mergeRanks . -- merge the ranks of duplicates
groupBy ((==) `on` (snd . snd)) . -- group duplicate values
zip [1..] . -- give them ranks (duplicates receive different ranks here)
sortBy (comparing snd) \$ -- sort by their values
zip (repeat True) (U.toList xs1) ++ zip (repeat False) (U.toList xs2)
-- Tag each sample with an identifier before we merge them
where
mergeRanks :: [(AbsoluteRank, (Bool, Double))] -> [(AbsoluteRank, Bool)]
mergeRanks xs = zip (repeat rank) (map (fst . snd) xs)
where
-- Ranks are merged by assigning them all the average of their ranks:
rank = sum (map fst xs) / fromIntegral (length xs)

-- | The Mann-Whitney U Test.
--
-- This is sometimes known as the Mann-Whitney-Wilcoxon U test, and
-- confusingly many sources state that the Mann-Whitney U test is the same as
-- the Wilcoxon's rank sum test (which is provided as 'wilcoxonRankSums').
-- The Mann-Whitney U is a simple transform of Wilcoxon's rank sum test.
--
-- Again confusingly, different sources state reversed definitions for U_1 and U_2,
-- so it is worth being explicit about what this function returns.  Given two samples,
-- the first, xs_1, of size n_1 and the second, xs_2, of size n_2, this function
-- returns (U_1, U_2) where U_1 = W_1 - (n_1*(n_1+1))\/2 and U_2 = W_2 - (n_2*(n_2+1))\/2,
-- where (W_1, W_2) is the return value of @wilcoxonRankSums xs1 xs2@.
--
-- Some sources instead state that U_1 and U_2 should be the other way round, often
-- expressing this using U_1' = n_1*n_2 - U_1 (since U_1 + U_2 = n_1*n*2).
--
-- All of which you probably don't care about if you just feed this into 'mannWhitneyUSignificant'.
mannWhitneyU :: Sample -> Sample -> (Double, Double)
mannWhitneyU xs1 xs2
= (fst summedRanks - (n1*(n1 + 1))/2
,snd summedRanks - (n2*(n2 + 1))/2)
where
n1 = fromIntegral \$ U.length xs1
n2 = fromIntegral \$ U.length xs2

summedRanks = wilcoxonRankSums xs1 xs2

-- | Calculates the critical value of Mann-Whitney U for the given sample
-- sizes and significance level.
--
-- This function returns the exact calculated value of U for all sample sizes;
-- it does not use the normal approximation at all.  Above sample size 20 it is
-- generally recommended to use the normal approximation instead, but this function
-- will calculate the higher critical values if you need them.
--
-- The algorithm to generate these values is a faster, memoised version of the
-- simple unoptimised generating function given in section 2 of \"The Mann Whitney
-- Wilcoxon Distribution Using Linked Lists\", Cheung and Klotz, Statistica Sinica
-- 7 (1997), <http://www3.stat.sinica.edu.tw/statistica/oldpdf/A7n316.pdf>.
mannWhitneyUCriticalValue :: (Int, Int) -- ^ The sample size
-> Double -- ^ The p-value (e.g. 0.05) for which you want the critical value.
-> Maybe Int -- ^ The critical value (of U).
mannWhitneyUCriticalValue (m, n) p
| p' <= 1 = Nothing
| m < 1 || n < 1 = Nothing
| otherwise = findIndex (>= p') \$ let
firstHalf = map fromIntegral \$ take (((m*n)+1)`div`2) \$ tail \$ alookup !! (m+n-2) !! (min m n - 1)
{- Original: [fromIntegral \$ a k (m+n) (min m n) | k <- [1..m*n]] -}
secondHalf
| even (m*n) = reverse firstHalf
| otherwise = tail \$ reverse firstHalf
in firstHalf ++ map (mnCn -) secondHalf
where
mnCn = (m+n) `choose` n
p' = mnCn * p

{- Original function, without memoisation, from Cheung and Klotz:
a :: Int -> Int -> Int -> Int
a u bigN m
| u < 0 = 0
| u >= (m * smalln) = floor \$ fromIntegral bigN `choose` fromIntegral m
| m == 1 || smalln == 1 = u + 1
| otherwise = a u (bigN - 1) m
+ a (u - smalln) (bigN - 1) (m-1)
where smalln = bigN - m
-}

-- Memoised version of the original a function, above.
--
-- outer list is indexed by big N - 2
-- inner list by m (we know m < bigN)
-- innermost list by u
--
-- So: (alookup ! (bigN - 2) ! m ! u) == a u bigN m
alookup :: [[[Int]]]
alookup = gen 2 [1 : repeat 2]
where
gen bigN predBigNList
= let bigNlist = [ let limit = round \$ fromIntegral bigN `choose` fromIntegral m
in [amemoed u m | u <- [0..m*(bigN-m)]] ++ repeat limit
| m <- [1..(bigN-1)]] -- has bigN-1 elements
in bigNlist : gen (bigN+1) bigNlist
where
amemoed :: Int -> Int -> Int
amemoed u m
| m == 1 || smalln == 1 = u + 1
| otherwise = let (predmList : mList : _) = drop (m-2) predBigNList -- m-2 because starts at 1
-- We know that predBigNList has bigN - 2 elements
-- (and we know that smalln > 1 therefore bigN > m + 1)
-- So bigN - 2 >= m, i.e. predBigNList must have at least m elements
-- elements, so dropping (m-2) must leave at least 2
in (mList !! u) + (if u < smalln then 0 else predmList !! (u - smalln))
where smalln = bigN - m

-- | Calculates whether the Mann Whitney U test is significant.
--
-- If both sample sizes are less than or equal to 20, the exact U critical value
-- (as calculated by 'mannWhitneyUCriticalValue') is used.  If either sample is
-- larger than 20, the normal approximation is used instead.
--
-- If you use a one-tailed test, the test indicates whether the first sample is
-- significantly larger than the second.  If you want the opposite, simply reverse
-- the order in both the sample size and the (U_1, U_2) pairs.
mannWhitneyUSignificant :: Bool -- ^ Perform one-tailed test (see description above).
-> (Int, Int)  -- ^ The sample size from which the (U_1,U_2) values were derived.
-> Double -- ^ The p-value at which to test (e.g. 0.05)
-> (Double, Double) -- ^ The (U_1, U_2) values from 'mannWhitneyU'.
-> Maybe Bool -- ^ Just True if the test is significant, Just
-- False if it is not, and Nothing if the sample
-- was too small to make a decision.
mannWhitneyUSignificant oneTail (in1, in2) p (u1, u2)
| in1 > 20 || in2 > 20 --Use normal approximation
--     = (n1*(n1+1))/2 - u1 - (n1*(n1+n2))/2
--     = (n1*(n1+1))/2 - (-2*u1 + n1*(n1+n2))/2
--     = (n1*(n1+1) - 2*u1 + n1*(n1+n2))/2
--     = (n1*(2*n1 + n2 + 1) - 2*u1)/2
= let num = (n1*(2*n1 + n2 + 1)) / 2 - u1
denom = sqrt \$ n1*n2*(n1 + n2 + 1) / 12
z = num / denom
zcrit = quantile standard (1 - if oneTail then p else p/2)
in Just \$ (if oneTail then z else abs z) > zcrit
| otherwise = do crit <- fromIntegral <\$> mannWhitneyUCriticalValue (in1, in2) p
return \$ if oneTail
then u2 <= crit
else min u1 u2 <= crit
where
n1 = fromIntegral in1
n2 = fromIntegral in2

-- | The Wilcoxon matched-pairs signed-rank test.
--
-- The value returned is the pair (T+, T-).  T+ is the sum of positive ranks (the
-- ranks of the differences where the first parameter is higher) whereas T- is
-- the sum of negative ranks (the ranks of the differences where the second parameter is higher).
-- These values mean little by themselves, and should be combined with the 'wilcoxonSignificant'
-- function in this module to get a meaningful result.
--
-- The samples are zipped together: if one is longer than the other, both are truncated
-- to the the length of the shorter sample.
--
-- Note that: wilcoxonMatchedPairSignedRank == (\(x, y) -> (y, x)) . flip wilcoxonMatchedPairSignedRank
wilcoxonMatchedPairSignedRank :: Sample -> Sample -> (Double, Double)
wilcoxonMatchedPairSignedRank a b
-- Best to read this function bottom to top:
= (sum *** sum) . -- Sum the positive and negative ranks separately.
partition (> 0) . -- Split the ranks into positive and negative.  None of the
-- ranks can be zero.
concatMap mergeRanks . -- Then merge the ranks for any duplicates by taking
-- the average of the ranks, and also make the rank
-- into a signed rank
groupBy ((==) `on` abs . snd) . -- Now group any duplicates together
-- Note: duplicate means same absolute difference
zip [1..] . -- Add a rank (note: at this stage, duplicates will get different ranks)
dropWhile (== 0) . -- Remove any differences that are zero (i.e. ties in the
-- original data).  We know they must be at the head of
-- the list because we just sorted it, so dropWhile not filter
sortBy (comparing abs) . -- Sort the differences by absolute difference
U.toList \$ -- Convert to a list (could be done later in the pipeline?)
U.zipWith (-) a b -- Work out differences
where
mergeRanks :: [(AbsoluteRank, Double)] -> [SignedRank]
mergeRanks xs = map ((* rank) . signum . snd) xs
-- Note that signum above will always be 1 or -1; any zero differences will
-- have been removed before this function is called.
where
-- Ranks are merged by assigning them all the average of their ranks:
rank = sum (map fst xs) / fromIntegral (length xs)

type AbsoluteRank = Double
type SignedRank = Double

-- | The coefficients for x^0, x^1, x^2, etc, in the expression
-- \prod_{r=1}^s (1 + x^r).  See the Mitic paper for details.
--
-- We can define:
-- f(1) = 1 + x
-- f(r) = (1 + x^r)*f(r-1)
--      = f(r-1) + x^r * f(r-1)
-- The effect of multiplying the equation by x^r is to shift
-- all the coefficients by r down the list.
--
-- This list will be processed lazily from the head.
coefficients :: Int -> [Int]
coefficients 1 = [1, 1] -- 1 + x
coefficients r = let coeffs = coefficients (r-1)
(firstR, rest) = splitAt r coeffs
in firstR ++ add rest coeffs
where

-- This list will be processed lazily from the head.
summedCoefficients :: Int -> [Double]
summedCoefficients = map fromIntegral . scanl1 (+) . coefficients

-- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs test
-- is significant at the given level.
--
-- This function can perform a one-tailed or two-tailed test.  If the first
-- parameter to this function is False, the test is performed two-tailed to
-- check if the two samples differ significantly.  If the first parameter is
-- True, the check is performed one-tailed to decide whether the first sample
-- (i.e. the first sample you passed to 'wilcoxonMatchedPairSignedRank') is
-- greater than the second sample (i.e. the second sample you passed to
-- 'wilcoxonMatchedPairSignedRank').  If you wish to perform a one-tailed test
-- in the opposite direction, you can either pass the parameters in a different
-- order to 'wilcoxonMatchedPairSignedRank', or simply swap the values in the resulting
-- pair before passing them to this function.
wilcoxonMatchedPairSignificant :: Bool -- ^ Perform one-tailed test (see description above).
-> Int  -- ^ The sample size from which the (T+,T-) values were derived.
-> Double -- ^ The p-value at which to test (e.g. 0.05)
-> (Double, Double) -- ^ The (T+, T-) values from 'wilcoxonMatchedPairSignedRank'.
-> Maybe Bool -- ^ Just True if the test is significant, Just
-- False if it is not, and Nothing if the sample
-- was too small to make a decision.
wilcoxonMatchedPairSignificant oneTail sampleSize p (tPlus, tMinus)
-- According to my nearest book (Understanding Research Methods and Statistics
-- by Gary W. Heiman, p590), to check that the first sample is bigger you must
-- use the absolute value of T- for a one-tailed check:
| oneTail = ((abs tMinus <=) . fromIntegral) <\$> wilcoxonMatchedPairCriticalValue sampleSize p
-- Otherwise you must use the value of T+ and T- with the smallest absolute value:
| otherwise = ((t <=) . fromIntegral) <\$> wilcoxonMatchedPairCriticalValue sampleSize (p/2)
where
t = min (abs tPlus) (abs tMinus)

-- | Obtains the critical value of T to compare against, given a sample size
-- and a p-value (significance level).  Your T value must be less than or
-- equal to the return of this function in order for the test to work out
-- significant.  If there is a Nothing return, the sample size is too small to
-- make a decision.
--
-- 'wilcoxonSignificant' tests the return value of 'wilcoxonMatchedPairSignedRank'
-- for you, so you should use 'wilcoxonSignificant' for determining test results.
--  However, this function is useful, for example, for generating lookup tables
-- for Wilcoxon signed rank critical values.
--
-- The return values of this function are generated using the method detailed in
-- the paper \"Critical Values for the Wilcoxon Signed Rank Statistic\", Peter
-- Mitic, The Mathematica Journal, volume 6, issue 3, 1996, which can be found
-- here: <http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf>.
-- According to that paper, the results may differ from other published lookup tables, but
-- (Mitic claims) the values obtained by this function will be the correct ones.
wilcoxonMatchedPairCriticalValue :: Int -- ^ The sample size
-> Double -- ^ The p-value (e.g. 0.05) for which you want the critical value.
-> Maybe Int -- ^ The critical value (of T), or Nothing if
-- the sample is too small to make a decision.
wilcoxonMatchedPairCriticalValue sampleSize p
= case critical of
Just n | n < 0 -> Nothing
| otherwise -> Just n
Nothing -> Just maxBound -- shouldn't happen: beyond end of list
where
m = (2 ** fromIntegral sampleSize) * p
critical = subtract 1 <\$> findIndex (> m) (summedCoefficients sampleSize)

-- | Works out the significance level (p-value) of a T value, given a sample
-- size and a T value from the Wilcoxon signed-rank matched-pairs test.
--
-- See the notes on 'wilcoxonCriticalValue' for how this is calculated.
wilcoxonMatchedPairSignificance :: Int -- ^ The sample size
-> Double -- ^ The value of T for which you want the significance.
-> Double -- ^^ The significance (p-value).
wilcoxonMatchedPairSignificance sampleSize rank
= (summedCoefficients sampleSize !! floor rank) / 2 ** fromIntegral sampleSize

```