-- |
-- Module    : Statistics.Test.KruskalWallis
-- Copyright : (c) 2014 Danny Navarro
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
module Statistics.Test.KruskalWallis
  ( -- * Kruskal-Wallis test
    kruskalWallisTest
    -- ** Building blocks
  , kruskalWallisRank
  , kruskalWallis
  , module Statistics.Test.Types
  ) where

import Data.Ord (comparing)
import qualified Data.Vector.Unboxed as U
import Statistics.Function (sort, sortBy, square)
import Statistics.Distribution (complCumulative)
import Statistics.Distribution.ChiSquared (chiSquared)
import Statistics.Types
import Statistics.Test.Types
import Statistics.Test.Internal (rank)
import Statistics.Sample
import qualified Statistics.Sample.Internal as Sample(sum)


-- | Kruskal-Wallis ranking.
--
-- All values are replaced by the absolute rank in the combined samples.
--
-- The samples and values need not to be ordered but the values in the result
-- are ordered. Assigned ranks (ties are given their average rank).
kruskalWallisRank :: (U.Unbox a, Ord a) => [U.Vector a] -> [U.Vector Double]
kruskalWallisRank :: forall a. (Unbox a, Ord a) => [Vector a] -> [Vector Double]
kruskalWallisRank [Vector a]
samples = forall {a}.
(Unbox a, Eq a) =>
Vector (a, Double) -> [Vector Double]
groupByTags
                          forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> a
fst)
                          forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b.
(Unbox a, Unbox b) =>
Vector a -> Vector b -> Vector (a, b)
U.zip Vector Int
tags
                          forall a b. (a -> b) -> a -> b
$ forall (v :: * -> *) a.
(Vector v a, Vector v Double) =>
(a -> a -> Bool) -> v a -> v Double
rank forall a. Eq a => a -> a -> Bool
(==) Vector a
joinSample
  where
    (Vector Int
tags,Vector a
joinSample) = forall a b.
(Unbox a, Unbox b) =>
Vector (a, b) -> (Vector a, Vector b)
U.unzip
                      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> b
snd)
                      forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall {b} {a}.
(Unbox b, Unbox a) =>
a -> Vector b -> Vector (a, b)
tagSample) forall a b. (a -> b) -> a -> b
$ forall a b. [a] -> [b] -> [(a, b)]
zip [(Int
1::Int)..] [Vector a]
samples
    tagSample :: a -> Vector b -> Vector (a, b)
tagSample a
t = forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (\b
x -> (a
t,b
x))

    groupByTags :: Vector (a, Double) -> [Vector Double]
groupByTags Vector (a, Double)
xs
        | forall a. Unbox a => Vector a -> Bool
U.null Vector (a, Double)
xs = []
        | Bool
otherwise = Vector Double -> Vector Double
sort (forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map forall a b. (a, b) -> b
snd Vector (a, Double)
ys) forall a. a -> [a] -> [a]
: Vector (a, Double) -> [Vector Double]
groupByTags Vector (a, Double)
zs
      where
        (Vector (a, Double)
ys,Vector (a, Double)
zs) = forall a.
Unbox a =>
(a -> Bool) -> Vector a -> (Vector a, Vector a)
U.span (forall a. Eq a => a -> a -> Bool
(==) (forall a b. (a, b) -> a
fst forall a b. (a -> b) -> a -> b
$ forall a. Unbox a => Vector a -> a
U.head Vector (a, Double)
xs) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) Vector (a, Double)
xs


-- | The Kruskal-Wallis Test.
--
-- In textbooks the output value is usually represented by 'K' or 'H'. This
-- function already does the ranking.
kruskalWallis :: (U.Unbox a, Ord a) => [U.Vector a] -> Double
kruskalWallis :: forall a. (Unbox a, Ord a) => [Vector a] -> Double
kruskalWallis [Vector a]
samples = (Double
nTot forall a. Num a => a -> a -> a
- Double
1) forall a. Num a => a -> a -> a
* Double
numerator forall a. Fractional a => a -> a -> a
/ Double
denominator
  where
    -- Total number of elements in all samples
    nTot :: Double
nTot    = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall a. Num a => [Vector Double] -> (Vector Double -> a) -> a
sumWith [Vector Double]
rsamples forall a. Unbox a => Vector a -> Int
U.length
    -- Average rank of all samples
    avgRank :: Double
avgRank = (Double
nTot forall a. Num a => a -> a -> a
+ Double
1) forall a. Fractional a => a -> a -> a
/ Double
2
    --
    numerator :: Double
numerator = forall a. Num a => [Vector Double] -> (Vector Double -> a) -> a
sumWith [Vector Double]
rsamples forall a b. (a -> b) -> a -> b
$ \Vector Double
sample ->
        let n :: Double
n = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall a. Unbox a => Vector a -> Int
U.length Vector Double
sample
        in  Double
n forall a. Num a => a -> a -> a
* Double -> Double
square (forall (v :: * -> *). Vector v Double => v Double -> Double
mean Vector Double
sample forall a. Num a => a -> a -> a
- Double
avgRank)
    denominator :: Double
denominator = forall a. Num a => [Vector Double] -> (Vector Double -> a) -> a
sumWith [Vector Double]
rsamples forall a b. (a -> b) -> a -> b
$ \Vector Double
sample ->
        forall (v :: * -> *). Vector v Double => v Double -> Double
Sample.sum forall a b. (a -> b) -> a -> b
$ forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (\Double
r -> Double -> Double
square (Double
r forall a. Num a => a -> a -> a
- Double
avgRank)) Vector Double
sample

    rsamples :: [Vector Double]
rsamples = forall a. (Unbox a, Ord a) => [Vector a] -> [Vector Double]
kruskalWallisRank [Vector a]
samples


-- | Perform Kruskal-Wallis Test for the given samples and required
-- significance. For additional information check 'kruskalWallis'. This is just
-- a helper function.
--
-- It uses /Chi-Squared/ distribution for approximation as long as the sizes are
-- larger than 5. Otherwise the test returns 'Nothing'.
kruskalWallisTest :: (Ord a, U.Unbox a) => [U.Vector a] -> Maybe (Test ())
kruskalWallisTest :: forall a. (Ord a, Unbox a) => [Vector a] -> Maybe (Test ())
kruskalWallisTest []      = forall a. Maybe a
Nothing
kruskalWallisTest [Vector a]
samples
  -- We use chi-squared approximation here
  | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (forall a. Ord a => a -> a -> Bool
>Int
4) [Int]
ns = forall a. a -> Maybe a
Just Test { testSignificance :: PValue Double
testSignificance = forall a. (Ord a, Num a) => a -> PValue a
mkPValue forall a b. (a -> b) -> a -> b
$ forall d. Distribution d => d -> Double -> Double
complCumulative ChiSquared
d Double
k
                            , testStatistics :: Double
testStatistics   = Double
k
                            , testDistribution :: ()
testDistribution = ()
                            }
  | Bool
otherwise   = forall a. Maybe a
Nothing
  where
    k :: Double
k  = forall a. (Unbox a, Ord a) => [Vector a] -> Double
kruskalWallis [Vector a]
samples
    ns :: [Int]
ns = forall a b. (a -> b) -> [a] -> [b]
map forall a. Unbox a => Vector a -> Int
U.length [Vector a]
samples
    d :: ChiSquared
d  = Int -> ChiSquared
chiSquared (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
ns forall a. Num a => a -> a -> a
- Int
1)

-- * Helper functions

sumWith :: Num a => [Sample] -> (Sample -> a) -> a
sumWith :: forall a. Num a => [Vector Double] -> (Vector Double -> a) -> a
sumWith [Vector Double]
samples Vector Double -> a
f = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
Prelude.sum forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Vector Double -> a
f [Vector Double]
samples
{-# INLINE sumWith #-}