{-# LANGUAGE FlexibleContexts, Rank2Types, ScopedTypeVariables #-}
-- | Student's T-test is for assessing whether two samples have
--   different mean. This module contain several variations of
--   T-test. It's a parametric tests and assumes that samples are
--   normally distributed.
module Statistics.Test.StudentT
    (
      studentTTest
    , welchTTest
    , pairedTTest
    , module Statistics.Test.Types
    ) where

import Statistics.Distribution hiding (mean)
import Statistics.Distribution.StudentT
import Statistics.Sample (mean, varianceUnbiased)
import Statistics.Test.Types
import Statistics.Types    (mkPValue,PValue)
import Statistics.Function (square)
import qualified Data.Vector.Generic  as G
import qualified Data.Vector.Unboxed  as U
import qualified Data.Vector.Storable as S
import qualified Data.Vector          as V



-- | Two-sample Student's t-test. It assumes that both samples are
--   normally distributed and have same variance. Returns @Nothing@ if
--   sample sizes are not sufficient.
studentTTest :: (G.Vector v Double)
             => PositionTest  -- ^ one- or two-tailed test
             -> v Double      -- ^ Sample A
             -> v Double      -- ^ Sample B
             -> Maybe (Test StudentT)
studentTTest :: PositionTest -> v Double -> v Double -> Maybe (Test StudentT)
studentTTest PositionTest
test v Double
sample1 v Double
sample2
  | v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 Bool -> Bool -> Bool
|| v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = Maybe (Test StudentT)
forall a. Maybe a
Nothing
  | Bool
otherwise                                    = Test StudentT -> Maybe (Test StudentT)
forall a. a -> Maybe a
Just Test :: forall distr. PValue Double -> Double -> distr -> Test distr
Test
      { testSignificance :: PValue Double
testSignificance = PositionTest -> Double -> Double -> PValue Double
significance PositionTest
test Double
t Double
ndf
      , testStatistics :: Double
testStatistics   = Double
t
      , testDistribution :: StudentT
testDistribution = Double -> StudentT
studentT Double
ndf
      }
  where
    (Double
t, Double
ndf) = Bool -> v Double -> v Double -> (Double, Double)
forall (v :: * -> *).
Vector v Double =>
Bool -> v Double -> v Double -> (Double, Double)
tStatistics Bool
True v Double
sample1 v Double
sample2
{-# INLINABLE  studentTTest #-}
{-# SPECIALIZE studentTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE studentTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE studentTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}

-- | Two-sample Welch's t-test. It assumes that both samples are
--   normally distributed but doesn't assume that they have same
--   variance. Returns @Nothing@ if sample sizes are not sufficient.
welchTTest :: (G.Vector v Double)
           => PositionTest  -- ^ one- or two-tailed test
           -> v Double      -- ^ Sample A
           -> v Double      -- ^ Sample B
           -> Maybe (Test StudentT)
welchTTest :: PositionTest -> v Double -> v Double -> Maybe (Test StudentT)
welchTTest PositionTest
test v Double
sample1 v Double
sample2
  | v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 Bool -> Bool -> Bool
|| v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = Maybe (Test StudentT)
forall a. Maybe a
Nothing
  | Bool
otherwise                                    = Test StudentT -> Maybe (Test StudentT)
forall a. a -> Maybe a
Just Test :: forall distr. PValue Double -> Double -> distr -> Test distr
Test
      { testSignificance :: PValue Double
testSignificance = PositionTest -> Double -> Double -> PValue Double
significance PositionTest
test Double
t Double
ndf
      , testStatistics :: Double
testStatistics   = Double
t
      , testDistribution :: StudentT
testDistribution = Double -> StudentT
studentT Double
ndf
      }
  where
    (Double
t, Double
ndf) = Bool -> v Double -> v Double -> (Double, Double)
forall (v :: * -> *).
Vector v Double =>
Bool -> v Double -> v Double -> (Double, Double)
tStatistics Bool
False v Double
sample1 v Double
sample2
{-# INLINABLE  welchTTest #-}
{-# SPECIALIZE welchTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE welchTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE welchTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}

-- | Paired two-sample t-test. Two samples are paired in a
-- within-subject design. Returns @Nothing@ if sample size is not
-- sufficient.
pairedTTest :: forall v. (G.Vector v (Double, Double), G.Vector v Double)
            => PositionTest          -- ^ one- or two-tailed test
            -> v (Double, Double)    -- ^ paired samples
            -> Maybe (Test StudentT)
pairedTTest :: PositionTest -> v (Double, Double) -> Maybe (Test StudentT)
pairedTTest PositionTest
test v (Double, Double)
sample
  | v (Double, Double) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v (Double, Double)
sample Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = Maybe (Test StudentT)
forall a. Maybe a
Nothing
  | Bool
otherwise           = Test StudentT -> Maybe (Test StudentT)
forall a. a -> Maybe a
Just Test :: forall distr. PValue Double -> Double -> distr -> Test distr
Test
      { testSignificance :: PValue Double
testSignificance = PositionTest -> Double -> Double -> PValue Double
significance PositionTest
test Double
t Double
ndf
      , testStatistics :: Double
testStatistics   = Double
t
      , testDistribution :: StudentT
testDistribution = Double -> StudentT
studentT Double
ndf
      }
  where
    (Double
t, Double
ndf) = v (Double, Double) -> (Double, Double)
forall (v :: * -> *).
(Vector v (Double, Double), Vector v Double) =>
v (Double, Double) -> (Double, Double)
tStatisticsPaired v (Double, Double)
sample
{-# INLINABLE  pairedTTest #-}
{-# SPECIALIZE pairedTTest :: PositionTest -> U.Vector (Double,Double) -> Maybe (Test StudentT) #-}
{-# SPECIALIZE pairedTTest :: PositionTest -> V.Vector (Double,Double) -> Maybe (Test StudentT) #-}


-------------------------------------------------------------------------------

significance :: PositionTest    -- ^ one- or two-tailed
             -> Double          -- ^ t statistics
             -> Double          -- ^ degree of freedom
             -> PValue Double   -- ^ p-value
significance :: PositionTest -> Double -> Double -> PValue Double
significance PositionTest
test Double
t Double
df =
  case PositionTest
test of
    -- Here we exploit symmetry of T-distribution and calculate small tail
    PositionTest
SamplesDiffer -> Double -> PValue Double
forall a. (Ord a, Num a) => a -> PValue a
mkPValue (Double -> PValue Double) -> Double -> PValue Double
forall a b. (a -> b) -> a -> b
$ Double
2 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
tailArea (Double -> Double
forall a. Num a => a -> a
negate (Double -> Double
forall a. Num a => a -> a
abs Double
t))
    PositionTest
AGreater      -> Double -> PValue Double
forall a. (Ord a, Num a) => a -> PValue a
mkPValue (Double -> PValue Double) -> Double -> PValue Double
forall a b. (a -> b) -> a -> b
$ Double -> Double
tailArea (Double -> Double
forall a. Num a => a -> a
negate Double
t)
    PositionTest
BGreater      -> Double -> PValue Double
forall a. (Ord a, Num a) => a -> PValue a
mkPValue (Double -> PValue Double) -> Double -> PValue Double
forall a b. (a -> b) -> a -> b
$ Double -> Double
tailArea  Double
t
  where
    tailArea :: Double -> Double
tailArea = StudentT -> Double -> Double
forall d. Distribution d => d -> Double -> Double
cumulative (Double -> StudentT
studentT Double
df)


-- Calculate T statistics for two samples
tStatistics :: (G.Vector v Double)
            => Bool               -- variance equality
            -> v Double
            -> v Double
            -> (Double, Double)
{-# INLINE tStatistics #-}
tStatistics :: Bool -> v Double -> v Double -> (Double, Double)
tStatistics Bool
varequal v Double
sample1 v Double
sample2 = (Double
t, Double
ndf)
  where
    -- t-statistics
    t :: Double
t = (Double
m1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
m2) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double -> Double
forall a. Floating a => a -> a
sqrt (
      if Bool
varequal
        then ((Double
n1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
s1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ (Double
n2 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
s2) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
n1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
n2 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
2) Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
n1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
n2)
        else Double
s1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
n1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
s2 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
n2)

    -- degree of freedom
    ndf :: Double
ndf | Bool
varequal  = Double
n1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
n2 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
2
        | Bool
otherwise = Double -> Double
square (Double
s1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
n1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
s2 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
n2)
                    Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double -> Double
square Double
s1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double -> Double
square Double
n1 Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
n1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1)) Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double -> Double
square Double
s2 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double -> Double
square Double
n2 Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
n2 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1)))
    -- statistics of two samples
    n1 :: Double
n1 = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample1
    n2 :: Double
n2 = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample2
    m1 :: Double
m1 = v Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean v Double
sample1
    m2 :: Double
m2 = v Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean v Double
sample2
    s1 :: Double
s1 = v Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
varianceUnbiased v Double
sample1
    s2 :: Double
s2 = v Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
varianceUnbiased v Double
sample2


-- Calculate T-statistics for paired sample
tStatisticsPaired :: (G.Vector v (Double, Double), G.Vector v Double)
                  => v (Double, Double)
                  -> (Double, Double)
{-# INLINE tStatisticsPaired #-}
tStatisticsPaired :: v (Double, Double) -> (Double, Double)
tStatisticsPaired v (Double, Double)
sample = (Double
t, Double
ndf)
  where
    -- t-statistics
    t :: Double
t = let d :: v Double
d    = ((Double, Double) -> Double) -> v (Double, Double) -> v Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map ((Double -> Double -> Double) -> (Double, Double) -> Double
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (-)) v (Double, Double)
sample
            sumd :: Double
sumd = v Double -> Double
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
G.sum v Double
d
        in Double
sumd Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double -> Double
forall a. Floating a => a -> a
sqrt ((Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
* v Double -> Double
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
G.sum ((Double -> Double) -> v Double -> v Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map Double -> Double
square v Double
d) Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double -> Double
square Double
sumd) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
ndf)
    -- degree of freedom
    ndf :: Double
ndf = Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1
    n :: Double
n   = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ v (Double, Double) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v (Double, Double)
sample