```{-# LANGUAGE FlexibleContexts, Rank2Types, ScopedTypeVariables #-}
-- | Student's T-test is for assessing whether two samples have
--   different mean. This module contain several variations of
--   T-test. It's a parametric tests and assumes that samples are
--   normally distributed.
module Statistics.Test.StudentT
(
studentTTest
, welchTTest
, pairedTTest
, module Statistics.Test.Types
) where

import Statistics.Distribution hiding (mean)
import Statistics.Distribution.StudentT
import Statistics.Sample (mean, varianceUnbiased)
import Statistics.Test.Types
import Statistics.Types    (mkPValue,PValue)
import Statistics.Function (square)
import qualified Data.Vector.Generic  as G
import qualified Data.Vector.Unboxed  as U
import qualified Data.Vector.Storable as S
import qualified Data.Vector          as V

-- | Two-sample Student's t-test. It assumes that both samples are
--   normally distributed and have same variance. Returns @Nothing@ if
--   sample sizes are not sufficient.
studentTTest :: (G.Vector v Double)
=> PositionTest  -- ^ one- or two-tailed test
-> v Double      -- ^ Sample A
-> v Double      -- ^ Sample B
-> Maybe (Test StudentT)
studentTTest test sample1 sample2
| G.length sample1 < 2 || G.length sample2 < 2 = Nothing
| otherwise                                    = Just Test
{ testSignificance = significance test t ndf
, testStatistics   = t
, testDistribution = studentT ndf
}
where
(t, ndf) = tStatistics True sample1 sample2
{-# INLINABLE  studentTTest #-}
{-# SPECIALIZE studentTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE studentTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE studentTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}

-- | Two-sample Welch's t-test. It assumes that both samples are
--   normally distributed but doesn't assume that they have same
--   variance. Returns @Nothing@ if sample sizes are not sufficient.
welchTTest :: (G.Vector v Double)
=> PositionTest  -- ^ one- or two-tailed test
-> v Double      -- ^ Sample A
-> v Double      -- ^ Sample B
-> Maybe (Test StudentT)
welchTTest test sample1 sample2
| G.length sample1 < 2 || G.length sample2 < 2 = Nothing
| otherwise                                    = Just Test
{ testSignificance = significance test t ndf
, testStatistics   = t
, testDistribution = studentT ndf
}
where
(t, ndf) = tStatistics False sample1 sample2
{-# INLINABLE  welchTTest #-}
{-# SPECIALIZE welchTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE welchTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE welchTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}

-- | Paired two-sample t-test. Two samples are paired in a
-- within-subject design. Returns @Nothing@ if sample size is not
-- sufficient.
pairedTTest :: forall v. (G.Vector v (Double, Double), G.Vector v Double)
=> PositionTest          -- ^ one- or two-tailed test
-> v (Double, Double)    -- ^ paired samples
-> Maybe (Test StudentT)
pairedTTest test sample
| G.length sample < 2 = Nothing
| otherwise           = Just Test
{ testSignificance = significance test t ndf
, testStatistics   = t
, testDistribution = studentT ndf
}
where
(t, ndf) = tStatisticsPaired sample
{-# INLINABLE  pairedTTest #-}
{-# SPECIALIZE pairedTTest :: PositionTest -> U.Vector (Double,Double) -> Maybe (Test StudentT) #-}
{-# SPECIALIZE pairedTTest :: PositionTest -> V.Vector (Double,Double) -> Maybe (Test StudentT) #-}

-------------------------------------------------------------------------------

significance :: PositionTest    -- ^ one- or two-tailed
-> Double          -- ^ t statistics
-> Double          -- ^ degree of freedom
-> PValue Double   -- ^ p-value
significance test t df =
case test of
-- Here we exploit symmetry of T-distribution and calculate small tail
SamplesDiffer -> mkPValue \$ 2 * tailArea (negate (abs t))
AGreater      -> mkPValue \$ tailArea (negate t)
BGreater      -> mkPValue \$ tailArea  t
where
tailArea = cumulative (studentT df)

-- Calculate T statistics for two samples
tStatistics :: (G.Vector v Double)
=> Bool               -- variance equality
-> v Double
-> v Double
-> (Double, Double)
{-# INLINE tStatistics #-}
tStatistics varequal sample1 sample2 = (t, ndf)
where
-- t-statistics
t = (m1 - m2) / sqrt (
if varequal
then ((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2) * (1 / n1 + 1 / n2)
else s1 / n1 + s2 / n2)

-- degree of freedom
ndf | varequal  = n1 + n2 - 2
| otherwise = square (s1 / n1 + s2 / n2)
/ (square s1 / (square n1 * (n1 - 1)) + square s2 / (square n2 * (n2 - 1)))
-- statistics of two samples
n1 = fromIntegral \$ G.length sample1
n2 = fromIntegral \$ G.length sample2
m1 = mean sample1
m2 = mean sample2
s1 = varianceUnbiased sample1
s2 = varianceUnbiased sample2

-- Calculate T-statistics for paired sample
tStatisticsPaired :: (G.Vector v (Double, Double), G.Vector v Double)
=> v (Double, Double)
-> (Double, Double)
{-# INLINE tStatisticsPaired #-}
tStatisticsPaired sample = (t, ndf)
where
-- t-statistics
t = let d    = G.map (uncurry (-)) sample
sumd = G.sum d
in sumd / sqrt ((n * G.sum (G.map square d) - square sumd) / ndf)
-- degree of freedom
ndf = n - 1
n   = fromIntegral \$ G.length sample
```