module Statistics.Test.KolmogorovSmirnov (
kolmogorovSmirnovTest
, kolmogorovSmirnovTestCdf
, kolmogorovSmirnovTest2
, kolmogorovSmirnovCdfD
, kolmogorovSmirnovD
, kolmogorovSmirnov2D
, kolmogorovSmirnovProbability
, TestType(..)
, TestResult(..)
) where
import Control.Monad
import Control.Monad.ST (ST)
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as M
import Statistics.Distribution (Distribution(..))
import Statistics.Types (Sample)
import Statistics.Function (sort)
import Statistics.Test.Types
import Text.Printf
kolmogorovSmirnovTest :: Distribution d
=> d
-> Double
-> Sample
-> TestResult
kolmogorovSmirnovTest d = kolmogorovSmirnovTestCdf (cumulative d)
kolmogorovSmirnovTestCdf :: (Double -> Double)
-> Double
-> Sample
-> TestResult
kolmogorovSmirnovTestCdf cdf p sample
| p > 0 && p < 1 = significant $ 1 prob < p
| otherwise = error "Statistics.Test.KolmogorovSmirnov.kolmogorovSmirnovTestCdf:bad p-value"
where
d = kolmogorovSmirnovCdfD cdf sample
prob = kolmogorovSmirnovProbability (U.length sample) d
kolmogorovSmirnovTest2 :: Double
-> Sample
-> Sample
-> TestResult
kolmogorovSmirnovTest2 p xs1 xs2
| p > 0 && p < 1 = significant $ 1 prob( d*(en + 0.12 + 0.11/en) ) < p
| otherwise = error "Statistics.Test.KolmogorovSmirnov.kolmogorovSmirnovTest2:bad p-value"
where
d = kolmogorovSmirnov2D xs1 xs2
n1 = fromIntegral (U.length xs1)
n2 = fromIntegral (U.length xs2)
en = sqrt $ n1 * n2 / (n1 + n2)
prob z
| z < 0 = error "kolmogorovSmirnov2D: internal error"
| z == 0 = 1
| z < 1.18 = let y = exp( 1.23370055013616983 / (z*z) )
in 2.25675833419102515 * sqrt( log(y) ) * (y + y**9 + y**25 + y**49)
| otherwise = let x = exp(2 * z * z)
in 1 2*(x x**4 + x**9)
kolmogorovSmirnovCdfD :: (Double -> Double)
-> Sample
-> Double
kolmogorovSmirnovCdfD cdf sample
| U.null xs = 0
| otherwise = U.maximum
$ U.zipWith3 (\p a b -> abs (pa) `max` abs (pb))
ps steps (U.tail steps)
where
xs = sort sample
n = U.length xs
ps = U.map cdf xs
steps = U.map ((/ fromIntegral n) . fromIntegral)
$ U.generate (n+1) id
kolmogorovSmirnovD :: (Distribution d)
=> d
-> Sample
-> Double
kolmogorovSmirnovD d = kolmogorovSmirnovCdfD (cumulative d)
kolmogorovSmirnov2D :: Sample
-> Sample
-> Double
kolmogorovSmirnov2D sample1 sample2
| U.null sample1 || U.null sample2 = 0
| otherwise = worker 0 0 0
where
xs1 = sort sample1
xs2 = sort sample2
n1 = U.length xs1
n2 = U.length xs2
en1 = fromIntegral n1
en2 = fromIntegral n2
skip x i xs = go (i+1)
where go n | n >= U.length xs = n
| xs U.! n == x = go (n+1)
| otherwise = n
worker d i1 i2
| i1 >= n1 || i2 >= n2 = d
| otherwise = worker d' i1' i2'
where
d1 = xs1 U.! i1
d2 = xs2 U.! i2
i1' | d1 <= d2 = skip d1 i1 xs1
| otherwise = i1
i2' | d2 <= d1 = skip d2 i2 xs2
| otherwise = i2
d' = max d (abs $ fromIntegral i1' / en1 fromIntegral i2' / en2)
kolmogorovSmirnovProbability :: Int
-> Double
-> Double
kolmogorovSmirnovProbability n d
| s > 7.24 || (s > 3.76 && n > 99) = 1 2 * exp( (2.000071 + 0.331 / sqrt n' + 1.409 / n') * s)
| otherwise = fini $ matrixPower matrix n
where
s = n' * d * d
n' = fromIntegral n
size = 2*k 1
k = floor (n' * d) + 1
h = fromIntegral k n' * d
matrix =
let m = U.create $ do
mat <- M.new (size*size)
for 0 size $ \row ->
for 0 size $ \col -> do
let val | row + 1 >= col = 1
| otherwise = 0 :: Double
M.write mat (row * size + col) val
for 0 size $ \i -> do
let delta = h ^^ (i + 1)
modify mat (i * size) (subtract delta)
modify mat (size * size 1 i) (subtract delta)
when (2*h > 1) $ do
modify mat ((size 1) * size) (+ ((2*h 1) ^ size))
let divide g num
| num == size = return ()
| otherwise = do for num size $ \i ->
modify mat (i * (size + 1) num) (/ g)
divide (g * fromIntegral (num+2)) (num+1)
divide 2 1
return mat
in Matrix size m 0
fini m@(Matrix _ _ e) = loop 1 (matrixCenter m) e
where
loop i ss eQ
| i > n = ss * 10 ^^ eQ
| ss' < 1e-140 = loop (i+1) (ss' * 1e140) (eQ 140)
| otherwise = loop (i+1) ss' eQ
where ss' = ss * fromIntegral i / fromIntegral n
data Matrix = Matrix
!Int
!(U.Vector Double)
!Int
instance Show Matrix where
show (Matrix n vs _) = unlines $ map (unwords . map (printf "%.4f")) $ split $ U.toList vs
where
split [] = []
split xs = row : split rest where (row, rest) = splitAt n xs
avoidOverflow :: Matrix -> Matrix
avoidOverflow m@(Matrix n xs e)
| matrixCenter m > 1e140 = Matrix n (U.map (* 1e-140) xs) (e + 140)
| otherwise = m
matrixMultiply :: Matrix -> Matrix -> Matrix
matrixMultiply (Matrix n xs e1) (Matrix _ ys e2) =
Matrix n (U.generate (n*n) go) (e1 + e2)
where
go i = U.sum $ U.zipWith (*) row col
where
nCol = i `rem` n
row = U.slice (i nCol) n xs
col = U.backpermute ys $ U.enumFromStepN nCol n n
matrixPower :: Matrix -> Int -> Matrix
matrixPower mat 1 = mat
matrixPower mat n = avoidOverflow res
where
mat2 = matrixPower mat (n `quot` 2)
pow = matrixMultiply mat2 mat2
res | odd n = matrixMultiply pow mat
| otherwise = pow
matrixCenter :: Matrix -> Double
matrixCenter (Matrix n xs _) = (U.!) xs (k*n + k) where k = n `quot` 2
for :: Monad m => Int -> Int -> (Int -> m ()) -> m ()
for n0 n f = loop n0
where
loop i | i == n = return ()
| otherwise = f i >> loop (i+1)
modify :: U.Unbox a => M.MVector s a -> Int -> (a -> a) -> ST s ()
modify arr i f = do x <- M.read arr i
M.write arr i (f x)