src/Data/Random/Distribution/Kolmogorov.hs

{-# LANGUAGE
        MultiParamTypeClasses,
        FlexibleInstances,
        FlexibleContexts 
  #-}
-- |CDF of Kolmogorov's D-statistic, parameterized by sample size
module Data.Random.Distribution.Kolmogorov (D(..), kCdf, kCdfQuick)where

import Numeric.LinearAlgebra
import Math.Gamma
import Math.Root.Bracket
import Math.Root.Finder
import Math.Root.Finder.Brent

import Data.Random

data D a = D Int
    deriving (Eq, Show)

instance Distribution D Double where
    rvar (D n) = do
        u <- stdUniform
        let f x = kCdfQuick n x - u
            eps = encodeFloat 1 (1 - floatDigits eps)
            (x0,x1) = last (bracket f 0.01 0.1)
        
        case findRoot f x0 x1 eps :: Either (Brent Double Double) (Brent Double Double) of
            Left stopped    -> fail ("Failed to find root, final state was: " ++ show stopped)
            Right root      -> do
                let z = estimateRoot root
                    dz = 0.5 * estimateError root
                
                -- Very slight "blur" to account for the error in the root.
                -- Center the blur above or below the reported root depending
                -- on whether the root is above or below the zero.
                case f z `compare` 0 of
                    GT -> normal (z-dz) dz
                    EQ -> normal z dz
                    LT -> normal (z+dz) dz
                
                

instance CDF D Double where
    cdf (D n) = kCdfQuick n

shiftPointU :: Num a => a
shiftPointU = 2 ^ 450
shiftPointL :: Fractional a => a
shiftPointL = 2 ^^ (-450)

shiftDist :: Integral a => a
shiftDist = 450

shiftBase :: Num a => a
shiftBase = 2

-- Shifting and unshifting operations: These manage the external exponent
-- for a value.
shift x = shiftBy id (*) x
shiftBy f (*) (x,e)
    | fx < shiftPointL  = (shiftPointU * x, e - shiftDist)
    | fx > shiftPointU  = (shiftPointL * x, e + shiftDist)
    | otherwise         = (x,e)
    where fx = f x

unshift (x,0) = x
unshift (x,e)
    | e >= shiftDist        = unshift (shiftPointU * x, e - shiftDist)
    | e <= negate shiftDist = unshift (shiftPointL * x, e + shiftDist)
    | otherwise             = x * shiftBase ** realToFrac e

multAndUnshift (a,aE) (b,bE) = unshift (a*b, aE+bE)

-- Decompose d into k and h as described in the paper
kDecomp n d = (k, h)
    where
        dn = d * fromIntegral n
        k = ceiling dn
        h = (fromIntegral k - dn)

-- Compute the scale factor n!/n^n in "shifted" form
kScale n = foldl f (1,0) [1..n]
        where
            f (s,e) i = shift (s * fromIntegral i / fromIntegral n, e)

-- H matrix used in kCdf and k value
kCdfMat n d = (matrix m m hMatCell, k)
    where
        spine n = (1 - h^n) / factorial n
        
        hMatCell r 0 | r == m-1     = (1 - 2 * h ^ m + max 0 ((2 * h - 1)^m)) / factorial m
        hMatCell r 0                = spine (fromIntegral r + 1)
        hMatCell r c | r == m-1     = spine (m - fromIntegral c)
        hMatCell r c
            | i - j + 1 >= 0    = 1 / factorial (fromIntegral (i-j+1))
            | otherwise         = 0
            where i = r + 1; j = c + 1
        
        (k, h) = kDecomp n d
        m = 2 * k - 1

-- CDF of Kolmogorov's D-statistic for a given sample size n
kCdf n d 
    | d <= 0    = 0
    | otherwise = multAndUnshift (kScale n) (indexM hN (k-1) (k-1), expShift)
    where
        (hN, expShift) = mPower hMat n
        (hMat, k) = kCdfMat n d

-- |'kCdf' with Marsaglia's long-computation shortcut approximation.
-- Accurate to about 7 decimal places in the right tail of the distribution.
kCdfQuick n d
    | s > 7.24 || (s > 3.76 && n > 99)
        = 1 - 2 * exp (negate (2.000071 + 0.331/sqrt n' + 1.409/n') * s)
    | otherwise = kCdf n d
    where s = d*d*n'; n' = fromIntegral n

-- |Matrix power to Int exponent with explicitly managed exponent.  Returns
-- a multiple of m^n, along with the natural logarithm of the factor.
--
-- That is, if @(mn, logS) = mPower m n@ then m^n = (e^logS) * mn
mPower m 1 = (m, 0)
mPower m n
    | even n    = square (mPower m (n `div` 2))
    | otherwise = m `mult` mPower m (n-1)
    where
        k = min (matRows m) (matCols m) `div` 2
        
        square (m, e) = m `mult` (m,e + e)
        mult m1 (m2, e2) = shiftBy (\m -> indexM m k k) scale (multiply m1 m2, e2)


-- Analytic limiting form (as n -> ∞)
-- kCdfLim d = sqrt (2 * pi) * recip_x * sum [exp (negate ((2 * fromIntegral i - 1)^2) * pi_2 * 0.125 * recip_x*recip_x) | i <- [1..]]
--     where
--         recip_x = recip x
--         pi_2 = pi*pi