{-# LANGUAGE FlexibleContexts #-}
module Statistics.Quantile
(
weightedAvg
, ContParam(..)
, continuousBy
, midspread
, cadpw
, hazen
, s
, spss
, medianUnbiased
, normalUnbiased
) where
import Data.Vector.Generic ((!))
import Numeric.MathFunctions.Constants (m_epsilon)
import Statistics.Function (partialSort)
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
weightedAvg :: G.Vector v Double =>
Int
-> Int
-> v Double
-> Double
weightedAvg k q x
| G.any isNaN x = modErr "weightedAvg" "Sample contains NaNs"
| n == 1 = G.head x
| q < 2 = modErr "weightedAvg" "At least 2 quantiles is needed"
| k < 0 || k >= q = modErr "weightedAvg" "Wrong quantile number"
| otherwise = xj + g * (xj1 - xj)
where
j = floor idx
idx = fromIntegral (n - 1) * fromIntegral k / fromIntegral q
g = idx - fromIntegral j
xj = sx ! j
xj1 = sx ! (j+1)
sx = partialSort (j+2) x
n = G.length x
{-# SPECIALIZE weightedAvg :: Int -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE weightedAvg :: Int -> Int -> V.Vector Double -> Double #-}
data ContParam = ContParam {-# UNPACK #-} !Double {-# UNPACK #-} !Double
continuousBy :: G.Vector v Double =>
ContParam
-> Int
-> Int
-> v Double
-> Double
continuousBy (ContParam a b) k q x
| q < 2 = modErr "continuousBy" "At least 2 quantiles is needed"
| k < 0 || k > q = modErr "continuousBy" "Wrong quantile number"
| G.any isNaN x = modErr "continuousBy" "Sample contains NaNs"
| otherwise = (1-h) * item (j-1) + h * item j
where
j = floor (t + eps)
t = a + p * (fromIntegral n + 1 - a - b)
p = fromIntegral k / fromIntegral q
h | abs r < eps = 0
| otherwise = r
where r = t - fromIntegral j
eps = m_epsilon * 4
n = G.length x
item = (sx !) . bracket
sx = partialSort (bracket j + 1) x
bracket m = min (max m 0) (n - 1)
{-# SPECIALIZE
continuousBy :: ContParam -> Int -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE
continuousBy :: ContParam -> Int -> Int -> V.Vector Double -> Double #-}
midspread :: G.Vector v Double =>
ContParam
-> Int
-> v Double
-> Double
midspread (ContParam a b) k x
| G.any isNaN x = modErr "midspread" "Sample contains NaNs"
| k <= 0 = modErr "midspread" "Nonpositive number of quantiles"
| otherwise = quantile (1-frac) - quantile frac
where
quantile i = (1-h i) * item (j i-1) + h i * item (j i)
j i = floor (t i + eps) :: Int
t i = a + i * (fromIntegral n + 1 - a - b)
h i | abs r < eps = 0
| otherwise = r
where r = t i - fromIntegral (j i)
eps = m_epsilon * 4
n = G.length x
item = (sx !) . bracket
sx = partialSort (bracket (j (1-frac)) + 1) x
bracket m = min (max m 0) (n - 1)
frac = 1 / fromIntegral k
{-# SPECIALIZE midspread :: ContParam -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE midspread :: ContParam -> Int -> V.Vector Double -> Double #-}
cadpw :: ContParam
cadpw = ContParam 0 1
hazen :: ContParam
hazen = ContParam 0.5 0.5
spss :: ContParam
spss = ContParam 0 0
s :: ContParam
s = ContParam 1 1
medianUnbiased :: ContParam
medianUnbiased = ContParam third third
where third = 1/3
normalUnbiased :: ContParam
normalUnbiased = ContParam ta ta
where ta = 3/8
modErr :: String -> String -> a
modErr f err = error $ "Statistics.Quantile." ++ f ++ ": " ++ err