{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances, FlexibleContexts #-} -- |CDF of Kolmogorov's D-statistic, parameterized by sample size module Data.Random.Distribution.Kolmogorov (D(..), kCdf, kCdfQuick)where import Numeric.LinearAlgebra import Math.Gamma import Math.Root.Bracket import Math.Root.Finder import Math.Root.Finder.Brent import Data.Random data D a = D Int deriving (Eq, Show) instance Distribution D Double where rvar (D n) = do u <- stdUniform let f x = kCdfQuick n x - u eps = encodeFloat 1 (1 - floatDigits eps) (x0,x1) = last (bracket f 0.01 0.1) case findRoot f x0 x1 eps :: Either (Brent Double Double) (Brent Double Double) of Left stopped -> fail ("Failed to find root, final state was: " ++ show stopped) Right root -> do let z = estimateRoot root dz = 0.5 * estimateError root -- Very slight "blur" to account for the error in the root. -- Center the blur above or below the reported root depending -- on whether the root is above or below the zero. case f z `compare` 0 of GT -> normal (z-dz) dz EQ -> normal z dz LT -> normal (z+dz) dz instance CDF D Double where cdf (D n) = kCdfQuick n shiftPointU :: Num a => a shiftPointU = 2 ^ 450 shiftPointL :: Fractional a => a shiftPointL = 2 ^^ (-450) shiftDist :: Integral a => a shiftDist = 450 shiftBase :: Num a => a shiftBase = 2 -- Shifting and unshifting operations: These manage the external exponent -- for a value. shift x = shiftBy id (*) x shiftBy f (*) (x,e) | fx < shiftPointL = (shiftPointU * x, e - shiftDist) | fx > shiftPointU = (shiftPointL * x, e + shiftDist) | otherwise = (x,e) where fx = f x unshift (x,0) = x unshift (x,e) | e >= shiftDist = unshift (shiftPointU * x, e - shiftDist) | e <= negate shiftDist = unshift (shiftPointL * x, e + shiftDist) | otherwise = x * shiftBase ** realToFrac e multAndUnshift (a,aE) (b,bE) = unshift (a*b, aE+bE) -- Decompose d into k and h as described in the paper kDecomp n d = (k, h) where dn = d * fromIntegral n k = ceiling dn h = (fromIntegral k - dn) -- Compute the scale factor n!/n^n in "shifted" form kScale n = foldl f (1,0) [1..n] where f (s,e) i = shift (s * fromIntegral i / fromIntegral n, e) -- H matrix used in kCdf and k value kCdfMat n d = (matrix m m hMatCell, k) where spine n = (1 - h^n) / factorial n hMatCell r 0 | r == m-1 = (1 - 2 * h ^ m + max 0 ((2 * h - 1)^m)) / factorial m hMatCell r 0 = spine (fromIntegral r + 1) hMatCell r c | r == m-1 = spine (m - fromIntegral c) hMatCell r c | i - j + 1 >= 0 = 1 / factorial (fromIntegral (i-j+1)) | otherwise = 0 where i = r + 1; j = c + 1 (k, h) = kDecomp n d m = 2 * k - 1 -- CDF of Kolmogorov's D-statistic for a given sample size n kCdf n d | d <= 0 = 0 | otherwise = multAndUnshift (kScale n) (indexM hN (k-1) (k-1), expShift) where (hN, expShift) = mPower hMat n (hMat, k) = kCdfMat n d -- |'kCdf' with Marsaglia's long-computation shortcut approximation. -- Accurate to about 7 decimal places in the right tail of the distribution. kCdfQuick n d | s > 7.24 || (s > 3.76 && n > 99) = 1 - 2 * exp (negate (2.000071 + 0.331/sqrt n' + 1.409/n') * s) | otherwise = kCdf n d where s = d*d*n'; n' = fromIntegral n -- |Matrix power to Int exponent with explicitly managed exponent. Returns -- a multiple of m^n, along with the natural logarithm of the factor. -- -- That is, if @(mn, logS) = mPower m n@ then m^n = (e^logS) * mn mPower m 1 = (m, 0) mPower m n | even n = square (mPower m (n `div` 2)) | otherwise = m `mult` mPower m (n-1) where k = min (matRows m) (matCols m) `div` 2 square (m, e) = m `mult` (m,e + e) mult m1 (m2, e2) = shiftBy (\m -> indexM m k k) scale (multiply m1 m2, e2) -- Analytic limiting form (as n -> ∞) -- kCdfLim d = sqrt (2 * pi) * recip_x * sum [exp (negate ((2 * fromIntegral i - 1)^2) * pi_2 * 0.125 * recip_x*recip_x) | i <- [1..]] -- where -- recip_x = recip x -- pi_2 = pi*pi