{-# LANGUAGE ViewPatterns #-}
-- |
-- Module    : Statistics.Test.WilcoxonT
-- Copyright : (c) 2010 Neil Brown
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- The Wilcoxon matched-pairs signed-rank test is non-parametric test
-- which could be used to test whether two related samples have
-- different means.
module Statistics.Test.WilcoxonT (
    -- * Wilcoxon signed-rank matched-pair test
    -- ** Test
    wilcoxonMatchedPairTest
    -- ** Building blocks
  , wilcoxonMatchedPairSignedRank
  , wilcoxonMatchedPairSignificant
  , wilcoxonMatchedPairSignificance
  , wilcoxonMatchedPairCriticalValue
  , module Statistics.Test.Types
    -- * References
    -- $references
  ) where



--
--
--
-- Note that: wilcoxonMatchedPairSignedRank == (\(x, y) -> (y, x)) . flip wilcoxonMatchedPairSignedRank
-- The samples are zipped together: if one is longer than the other, both are truncated
-- The value returned is the pair (T+, T-).  T+ is the sum of positive ranks (the
-- These values mean little by themselves, and should be combined with the 'wilcoxonSignificant'
-- function in this module to get a meaningful result.
-- ranks of the differences where the first parameter is higher) whereas T- is
-- the sum of negative ranks (the ranks of the differences where the second parameter is higher).
-- to the length of the shorter sample.

import Control.Applicative ((<$>))
import Data.Function (on)
import Data.List (findIndex)
import Data.Ord (comparing)
import qualified Data.Vector.Unboxed as U
import Prelude hiding (sum)
import Statistics.Function (sortBy)
import Statistics.Sample.Internal (sum)
import Statistics.Test.Internal (rank, splitByTags)
import Statistics.Test.Types
import Statistics.Types -- (CL,pValue,getPValue)
import Statistics.Distribution
import Statistics.Distribution.Normal


-- | Calculate (n,T⁺,T⁻) values for both samples. Where /n/ is reduced
--   sample where equal pairs are removed.
wilcoxonMatchedPairSignedRank :: (Ord a, Num a, U.Unbox a) => U.Vector (a,a) -> (Int, Double, Double)
wilcoxonMatchedPairSignedRank :: Vector (a, a) -> (Int, Double, Double)
wilcoxonMatchedPairSignedRank Vector (a, a)
ab
  = (Int
nRed, Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
sum Vector Double
ranks1, Double -> Double
forall a. Num a => a -> a
negate (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
sum Vector Double
ranks2))
  where
    -- Positive and negative ranks
    (Vector Double
ranks1, Vector Double
ranks2) = Vector (Bool, Double) -> (Vector Double, Vector Double)
forall (v :: * -> *) a.
(Vector v a, Vector v (Bool, a)) =>
v (Bool, a) -> (v a, v a)
splitByTags
                     (Vector (Bool, Double) -> (Vector Double, Vector Double))
-> Vector (Bool, Double) -> (Vector Double, Vector Double)
forall a b. (a -> b) -> a -> b
$ Vector Bool -> Vector Double -> Vector (Bool, Double)
forall a b.
(Unbox a, Unbox b) =>
Vector a -> Vector b -> Vector (a, b)
U.zip Vector Bool
tags ((a -> a -> Bool) -> Vector a -> Vector Double
forall (v :: * -> *) a.
(Vector v a, Vector v Double) =>
(a -> a -> Bool) -> v a -> v Double
rank (a -> a -> Bool
forall a. Eq a => a -> a -> Bool
(==) (a -> a -> Bool) -> (a -> a) -> a -> a -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` a -> a
forall a. Num a => a -> a
abs) Vector a
diffs)
    -- Sorted list of differences
    diffsSorted :: Vector a
diffsSorted = Comparison a -> Vector a -> Vector a
forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy ((a -> a) -> Comparison a
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing a -> a
forall a. Num a => a -> a
abs)    -- Sort the differences by absolute difference
                (Vector a -> Vector a) -> Vector a -> Vector a
forall a b. (a -> b) -> a -> b
$ (a -> Bool) -> Vector a -> Vector a
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
U.filter  (a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
0)          -- Remove equal elements
                (Vector a -> Vector a) -> Vector a -> Vector a
forall a b. (a -> b) -> a -> b
$ ((a, a) -> a) -> Vector (a, a) -> Vector a
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map ((a -> a -> a) -> (a, a) -> a
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (-)) Vector (a, a)
ab    -- Work out differences
    nRed :: Int
nRed = Vector a -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector a
diffsSorted
    -- Sign tags and differences
    (Vector Bool
tags,Vector a
diffs) = Vector (Bool, a) -> (Vector Bool, Vector a)
forall a b.
(Unbox a, Unbox b) =>
Vector (a, b) -> (Vector a, Vector b)
U.unzip
                 (Vector (Bool, a) -> (Vector Bool, Vector a))
-> Vector (Bool, a) -> (Vector Bool, Vector a)
forall a b. (a -> b) -> a -> b
$ (a -> (Bool, a)) -> Vector a -> Vector (Bool, a)
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (\a
x -> (a
xa -> a -> Bool
forall a. Ord a => a -> a -> Bool
>a
0 , a
x))   -- Attach tags to distribution elements
                 (Vector a -> Vector (Bool, a)) -> Vector a -> Vector (Bool, a)
forall a b. (a -> b) -> a -> b
$ Vector a
diffsSorted



-- | The coefficients for x^0, x^1, x^2, etc, in the expression
-- \prod_{r=1}^s (1 + x^r).  See the Mitic paper for details.
--
-- We can define:
-- f(1) = 1 + x
-- f(r) = (1 + x^r)*f(r-1)
--      = f(r-1) + x^r * f(r-1)
-- The effect of multiplying the equation by x^r is to shift
-- all the coefficients by r down the list.
--
-- This list will be processed lazily from the head.
coefficients :: Int -> [Integer]
coefficients :: Int -> [Integer]
coefficients Int
1 = [Integer
1, Integer
1] -- 1 + x
coefficients Int
r = let coeffs :: [Integer]
coeffs = Int -> [Integer]
coefficients (Int
rInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
                     ([Integer]
firstR, [Integer]
rest) = Int -> [Integer] -> ([Integer], [Integer])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
r [Integer]
coeffs
  in [Integer]
firstR [Integer] -> [Integer] -> [Integer]
forall a. [a] -> [a] -> [a]
++ [Integer] -> [Integer] -> [Integer]
forall a. Num a => [a] -> [a] -> [a]
add [Integer]
rest [Integer]
coeffs
  where
    add :: [a] -> [a] -> [a]
add (a
x:[a]
xs) (a
y:[a]
ys) = a
x a -> a -> a
forall a. Num a => a -> a -> a
+ a
y a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a] -> [a] -> [a]
add [a]
xs [a]
ys
    add [a]
xs [] = [a]
xs
    add [] [a]
ys = [a]
ys

-- This list will be processed lazily from the head.
summedCoefficients :: Int -> [Double]
summedCoefficients :: Int -> [Double]
summedCoefficients Int
n
  | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
1     = [Char] -> [Double]
forall a. HasCallStack => [Char] -> a
error [Char]
"Statistics.Test.WilcoxonT.summedCoefficients: nonpositive sample size"
  | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1023  = [Char] -> [Double]
forall a. HasCallStack => [Char] -> a
error [Char]
"Statistics.Test.WilcoxonT.summedCoefficients: sample is too large (see bug #18)"
  | Bool
otherwise = (Integer -> Double) -> [Integer] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Integer -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Integer] -> [Double]) -> [Integer] -> [Double]
forall a b. (a -> b) -> a -> b
$ (Integer -> Integer -> Integer) -> [Integer] -> [Integer]
forall a. (a -> a -> a) -> [a] -> [a]
scanl1 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
(+) ([Integer] -> [Integer]) -> [Integer] -> [Integer]
forall a b. (a -> b) -> a -> b
$ Int -> [Integer]
coefficients Int
n



-- | Tests whether a given result from a Wilcoxon signed-rank matched-pairs test
-- is significant at the given level.
--
-- This function can perform a one-tailed or two-tailed test.  If the first
-- parameter to this function is 'TwoTailed', the test is performed two-tailed to
-- check if the two samples differ significantly.  If the first parameter is
-- 'OneTailed', the check is performed one-tailed to decide whether the first sample
-- (i.e. the first sample you passed to 'wilcoxonMatchedPairSignedRank') is
-- greater than the second sample (i.e. the second sample you passed to
-- 'wilcoxonMatchedPairSignedRank').  If you wish to perform a one-tailed test
-- in the opposite direction, you can either pass the parameters in a different
-- order to 'wilcoxonMatchedPairSignedRank', or simply swap the values in the resulting
-- pair before passing them to this function.
wilcoxonMatchedPairSignificant
  :: PositionTest          -- ^ How to compare two samples
  -> PValue Double         -- ^ The p-value at which to test (e.g. @mkPValue 0.05@)
  -> (Int, Double, Double) -- ^ The (n,T⁺, T⁻) values from 'wilcoxonMatchedPairSignedRank'.
  -> Maybe TestResult      -- ^ Return 'Nothing' if the sample was too
                           --   small to make a decision.
wilcoxonMatchedPairSignificant :: PositionTest
-> PValue Double -> (Int, Double, Double) -> Maybe TestResult
wilcoxonMatchedPairSignificant PositionTest
test PValue Double
pVal (Int
sampleSize, Double
tPlus, Double
tMinus) =
  case PositionTest
test of
    -- According to my nearest book (Understanding Research Methods and Statistics
    -- by Gary W. Heiman, p590), to check that the first sample is bigger you must
    -- use the absolute value of T- for a one-tailed check:
    PositionTest
AGreater      -> do Int
crit <- Int -> PValue Double -> Maybe Int
wilcoxonMatchedPairCriticalValue Int
sampleSize PValue Double
pVal
                        TestResult -> Maybe TestResult
forall (m :: * -> *) a. Monad m => a -> m a
return (TestResult -> Maybe TestResult) -> TestResult -> Maybe TestResult
forall a b. (a -> b) -> a -> b
$ Bool -> TestResult
significant (Bool -> TestResult) -> Bool -> TestResult
forall a b. (a -> b) -> a -> b
$ Double -> Double
forall a. Num a => a -> a
abs Double
tMinus Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
crit
    PositionTest
BGreater      -> do Int
crit <- Int -> PValue Double -> Maybe Int
wilcoxonMatchedPairCriticalValue Int
sampleSize PValue Double
pVal
                        TestResult -> Maybe TestResult
forall (m :: * -> *) a. Monad m => a -> m a
return (TestResult -> Maybe TestResult) -> TestResult -> Maybe TestResult
forall a b. (a -> b) -> a -> b
$ Bool -> TestResult
significant (Bool -> TestResult) -> Bool -> TestResult
forall a b. (a -> b) -> a -> b
$ Double -> Double
forall a. Num a => a -> a
abs Double
tPlus Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
crit
    -- Otherwise you must use the value of T+ and T- with the smallest absolute value:
    --
    -- Note that in absence of ties sum of |T+| and |T-| is constant
    -- so by selecting minimal we are performing two-tailed test and
    -- look and both tails of distribution of T.
    PositionTest
SamplesDiffer -> do Int
crit <- Int -> PValue Double -> Maybe Int
wilcoxonMatchedPairCriticalValue Int
sampleSize (Double -> PValue Double
forall a. (Ord a, Num a) => a -> PValue a
mkPValue (Double -> PValue Double) -> Double -> PValue Double
forall a b. (a -> b) -> a -> b
$ Double
pDouble -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
2)
                        TestResult -> Maybe TestResult
forall (m :: * -> *) a. Monad m => a -> m a
return (TestResult -> Maybe TestResult) -> TestResult -> Maybe TestResult
forall a b. (a -> b) -> a -> b
$ Bool -> TestResult
significant (Bool -> TestResult) -> Bool -> TestResult
forall a b. (a -> b) -> a -> b
$ Double
t Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
crit
  where
    t :: Double
t = Double -> Double -> Double
forall a. Ord a => a -> a -> a
min (Double -> Double
forall a. Num a => a -> a
abs Double
tPlus) (Double -> Double
forall a. Num a => a -> a
abs Double
tMinus)
    p :: Double
p = PValue Double -> Double
forall a. PValue a -> a
pValue PValue Double
pVal


-- | Obtains the critical value of T to compare against, given a sample size
-- and a p-value (significance level).  Your T value must be less than or
-- equal to the return of this function in order for the test to work out
-- significant.  If there is a Nothing return, the sample size is too small to
-- make a decision.
--
-- 'wilcoxonSignificant' tests the return value of 'wilcoxonMatchedPairSignedRank'
-- for you, so you should use 'wilcoxonSignificant' for determining test results.
--  However, this function is useful, for example, for generating lookup tables
-- for Wilcoxon signed rank critical values.
--
-- The return values of this function are generated using the method
-- detailed in the Mitic's paper. According to that paper, the results
-- may differ from other published lookup tables, but (Mitic claims)
-- the values obtained by this function will be the correct ones.
wilcoxonMatchedPairCriticalValue ::
     Int                -- ^ The sample size
  -> PValue Double      -- ^ The p-value (e.g. @mkPValue 0.05@) for which you want the critical value.
  -> Maybe Int          -- ^ The critical value (of T), or Nothing if
                        --   the sample is too small to make a decision.
wilcoxonMatchedPairCriticalValue :: Int -> PValue Double -> Maybe Int
wilcoxonMatchedPairCriticalValue Int
n PValue Double
pVal
  | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
100   =
      case Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract Int
1 (Int -> Int) -> Maybe Int -> Maybe Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Double -> Bool) -> [Double] -> Maybe Int
forall a. (a -> Bool) -> [a] -> Maybe Int
findIndex (Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
m) (Int -> [Double]
summedCoefficients Int
n) of
        Just Int
k | Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0     -> Maybe Int
forall a. Maybe a
Nothing
               | Bool
otherwise -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
k
        Maybe Int
Nothing  -> [Char] -> Maybe Int
forall a. HasCallStack => [Char] -> a
error [Char]
"Statistics.Test.WilcoxonT.wilcoxonMatchedPairCriticalValue: impossible happened"
  | Bool
otherwise =
     case NormalDistribution -> Double -> Double
forall d. ContDistr d => d -> Double -> Double
quantile (Int -> NormalDistribution
normalApprox Int
n) Double
p of
       Double
z | Double
z Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0     -> Maybe Int
forall a. Maybe a
Nothing
         | Bool
otherwise -> Int -> Maybe Int
forall a. a -> Maybe a
Just (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round Double
z)
  where
    p :: Double
p = PValue Double -> Double
forall a. PValue a -> a
pValue PValue Double
pVal
    m :: Double
m = (Double
2 Double -> Double -> Double
forall a. Floating a => a -> a -> a
** Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
p


-- | Works out the significance level (p-value) of a T value, given a sample
-- size and a T value from the Wilcoxon signed-rank matched-pairs test.
--
-- See the notes on 'wilcoxonCriticalValue' for how this is calculated.
wilcoxonMatchedPairSignificance
  :: Int           -- ^ The sample size
  -> Double        -- ^ The value of T for which you want the significance.
  -> PValue Double -- ^ The significance (p-value).
wilcoxonMatchedPairSignificance :: Int -> Double -> PValue Double
wilcoxonMatchedPairSignificance Int
n Double
t
  = Double -> PValue Double
forall a. (Ord a, Num a) => a -> PValue a
mkPValue Double
p
  where
    p :: Double
p | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
100   = (Int -> [Double]
summedCoefficients Int
n [Double] -> Int -> Double
forall a. [a] -> Int -> a
!! Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
floor Double
t) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2 Double -> Double -> Double
forall a. Floating a => a -> a -> a
** Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
      | Bool
otherwise = NormalDistribution -> Double -> Double
forall d. Distribution d => d -> Double -> Double
cumulative (Int -> NormalDistribution
normalApprox Int
n) Double
t


-- | Normal approximation for Wilcoxon T statistics
normalApprox :: Int -> NormalDistribution
normalApprox :: Int -> NormalDistribution
normalApprox Int
ni
  = Double -> Double -> NormalDistribution
normalDistr Double
m Double
s
  where
    m :: Double
m = Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
1) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
4
    s :: Double
s = Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ (Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
1) Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
2Double -> Double -> Double
forall a. Num a => a -> a -> a
*Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
1)) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
24
    n :: Double
n = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
ni


-- | The Wilcoxon matched-pairs signed-rank test. The samples are
-- zipped together: if one is longer than the other, both are
-- truncated to the length of the shorter sample.
--
-- For one-tailed test it tests whether first sample is significantly
-- greater than the second. For two-tailed it checks whether they
-- significantly differ
--
-- Check 'wilcoxonMatchedPairSignedRank' and
-- 'wilcoxonMatchedPairSignificant' for additional information.
wilcoxonMatchedPairTest
  :: (Ord a, Num a, U.Unbox a)
  => PositionTest     -- ^ Perform one-tailed test.
  -> U.Vector (a,a)   -- ^ Sample of pairs
  -> Test ()          -- ^ Return 'Nothing' if the sample was too
                      --   small to make a decision.
wilcoxonMatchedPairTest :: PositionTest -> Vector (a, a) -> Test ()
wilcoxonMatchedPairTest PositionTest
test Vector (a, a)
pairs =
  Test :: forall distr. PValue Double -> Double -> distr -> Test distr
Test { testSignificance :: PValue Double
testSignificance = PValue Double
pVal
       , testStatistics :: Double
testStatistics   = Double
t
       , testDistribution :: ()
testDistribution = ()
       }
  where
    (Int
n,Double
tPlus,Double
tMinus) = Vector (a, a) -> (Int, Double, Double)
forall a.
(Ord a, Num a, Unbox a) =>
Vector (a, a) -> (Int, Double, Double)
wilcoxonMatchedPairSignedRank Vector (a, a)
pairs
    (Double
t,PValue Double
pVal) = case PositionTest
test of
                 PositionTest
AGreater      -> (Double -> Double
forall a. Num a => a -> a
abs Double
tMinus, Int -> Double -> PValue Double
wilcoxonMatchedPairSignificance Int
n (Double -> Double
forall a. Num a => a -> a
abs Double
tMinus))
                 PositionTest
BGreater      -> (Double -> Double
forall a. Num a => a -> a
abs Double
tPlus,  Int -> Double -> PValue Double
wilcoxonMatchedPairSignificance Int
n (Double -> Double
forall a. Num a => a -> a
abs Double
tPlus ))
                 -- Since we take minimum of T+,T- we can't get more
                 -- that p=0.5 and can multiply it by 2 without risk
                 -- of error.
                 PositionTest
SamplesDiffer -> let t' :: Double
t' = Double -> Double -> Double
forall a. Ord a => a -> a -> a
min (Double -> Double
forall a. Num a => a -> a
abs Double
tMinus) (Double -> Double
forall a. Num a => a -> a
abs Double
tPlus)
                                      p :: PValue Double
p  = Int -> Double -> PValue Double
wilcoxonMatchedPairSignificance Int
n Double
t'
                                  in (Double
t', Double -> PValue Double
forall a. (Ord a, Num a) => a -> PValue a
mkPValue (Double -> PValue Double) -> Double -> PValue Double
forall a b. (a -> b) -> a -> b
$ Double -> Double -> Double
forall a. Ord a => a -> a -> a
min Double
1 (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
2 Double -> Double -> Double
forall a. Num a => a -> a -> a
* PValue Double -> Double
forall a. PValue a -> a
pValue PValue Double
p)


-- $references
--
-- * \"Critical Values for the Wilcoxon Signed Rank Statistic\", Peter
--   Mitic, The Mathematica Journal, volume 6, issue 3, 1996
--   (<http://www.mathematica-journal.com/issue/v6i3/article/mitic/contents/63mitic.pdf>)