module Statistics.Order
(
L(..)
, (@@), (@!)
, (@#)
, breakdown
, trimean
, midhinge
, iqr
, iqm
, lscale
, trimmed
, winsorized, winsorised
, jackknifed
, mean
, total
, lmin, lmax
, midrange
, nthSmallest
, nthLargest
, quantile
, median
, tercile, t1, t2
, quartile, q1, q2, q3
, quintile, qu1, qu2, qu3, qu4
, percentile
, permille
, hdquantile
, quantileBy
, Estimator
, Estimate(..)
, r1,r2,r3,r4,r5,r6,r7,r8,r9,r10
) where
import Data.Ratio
import Data.List (sort)
import Data.IntMap (IntMap)
import qualified Data.IntMap as IM
import Data.Vector (Vector, (!))
import qualified Data.Vector as V
import Data.VectorSpace
import Statistics.Distribution.Beta
import qualified Statistics.Distribution as D
newtype L r = L { runL :: Int -> IntMap r }
(@@) :: (Num r, Ord r) => L r -> [r] -> r
l @@ xs = l @! V.fromList (sort xs)
(@!) :: Num r => L r -> Vector r -> r
L f @! v = IM.foldrWithKey (\k x y -> (v ! k) * x + y) 0 $ f (V.length v)
(@#) :: Num r => L r -> Int -> [r]
L f @# n = map (\k -> IM.findWithDefault 0 k fn) [0..n1] where fn = f n
breakdown :: (Num r, Eq r) => L r -> Int
breakdown (L f)
| IM.null m = 50
| otherwise = fst (IM.findMin m) `min` (100 fst (IM.findMax m))
where m = IM.filter (/= 0) $ f 101
instance Num r => AdditiveGroup (L r) where
zeroV = L $ \_ -> IM.empty
L fa ^+^ L fb = L $ \n -> IM.unionWith (+) (fa n) (fb n)
negateV (L fa) = L $ fmap negate . fa
instance Num r => VectorSpace (L r) where
type Scalar (L r) = r
x *^ L y = L $ fmap (x *) . y
clamp :: Int -> Int -> Int
clamp n k
| k <= 0 = 0
| k >= n = n 1
| otherwise = k
mean :: Fractional r => L r
mean = L $ \n -> IM.fromList [ (i, 1 / fromIntegral n) | i <- [0..n1]]
total :: Num r => L r
total = L $ \n -> IM.fromList [ (i, 1) | i <- [0..n1]]
trimmed :: Fractional r => Rational -> L r -> L r
trimmed p (L g) = L $ \n -> case properFraction (fromIntegral n * p) of
(w, 0) -> IM.fromDistinctAscList [ (k + w, v) | (k,v) <- IM.toAscList $ g (n w*2)]
(w, f) | w' <- w + 1 -> IM.fromListWith (+) $ [ (k + w, fromRational (1 f) * v) | (k,v) <- IM.toList $ g (n w*2)] ++
[ (k + w', fromRational f * v) | (k,v) <- IM.toList $ g (n w'*2)]
winsorized, winsorised :: Fractional r => Rational -> L r -> L r
winsorised = winsorized
winsorized p (L g) = L $ \n -> case properFraction (fromIntegral n * p) of
(w, 0) -> IM.fromAscListWith (+) [ (w `max` min (n 1 w) k, v) | (k,v) <- IM.toAscList (g n) ]
(w, f) | w' <- w + 1 -> IM.fromListWith (+) $ do
(k,v) <- IM.toList (g n)
[ (w `max` min (n 1 w ) k, v * fromRational (1 f)),
(w' `max` min (n 1 w') k, v * fromRational f)]
jackknifed :: Fractional r => L r -> L r
jackknifed (L g) = L $ \n -> IM.fromAscListWith (+) $ do
let n' = fromIntegral n
(k,v) <- IM.toAscList (g (n 1))
let k' = fromIntegral k + 1
[(k, (n' k') * v / n'), (k + 1, k' * v / n')]
median :: Fractional r => L r
median = L go where
go n
| odd n = IM.singleton (div (n 1) 2) 1
| k <- div n 2 = IM.fromList [(k1, 0.5), (k, 0.5)]
data Estimate r = Estimate !Rational (IntMap r)
type Estimator r = Rational -> Int -> Estimate r
quantileBy :: Num r => Estimator r -> Rational -> L r
quantileBy f p = L $ \n -> case f p n of
Estimate h qp -> case properFraction h of
(w, 0) -> IM.singleton (clamp n (w 1)) 1
_ -> qp
quantile :: Fractional r => Rational -> L r
quantile = quantileBy r2
tercile :: Fractional r => Rational -> L r
tercile n = quantile (n/3)
t1, t2 :: Fractional r => L r
t1 = quantile (1/3)
t2 = quantile (2/3)
quartile :: Fractional r => Rational -> L r
quartile n = quantile (n/4)
q1, q2, q3 :: Fractional r => L r
q1 = quantile 0.25
q2 = median
q3 = quantile 0.75
quintile :: Fractional r => Rational -> L r
quintile n = quantile (n/5)
qu1, qu2, qu3, qu4 :: Fractional r => L r
qu1 = quintile 1
qu2 = quintile 2
qu3 = quintile 3
qu4 = quintile 4
percentile :: Fractional r => Rational -> L r
percentile n = quantile (n/100)
permille :: Fractional r => Rational -> L r
permille n = quantile (n/1000)
nthSmallest :: Num r => Int -> L r
nthSmallest k = L $ \n -> IM.singleton (clamp n k) 1
nthLargest :: Num r => Int -> L r
nthLargest k = L $ \n -> IM.singleton (clamp n (n 1 k)) 1
midhinge :: Fractional r => L r
midhinge = (q1 ^+^ q3) ^/ 2
trimean :: Fractional r => L r
trimean = (q1 ^+^ 2 *^ q2 ^+^ q3) ^/ 4
lmax :: Num r => L r
lmax = L $ \n -> IM.singleton (n1) 1
lmin :: Num r => L r
lmin = L $ \_ -> IM.singleton 0 1
midrange :: Fractional r => L r
midrange = L $ \n -> IM.fromList [(0,1),(n1,1)]
iqr :: Fractional r => L r
iqr = q3 ^-^ q1
iqm :: Fractional r => L r
iqm = trimmed 0.25 mean
lscale :: Fractional r => L r
lscale = L $ \n -> let
r = fromIntegral n
scale = 1 / (r * (r1))
in IM.fromList [ (i 1, scale * (2 * fromIntegral i 1 r)) | i <- [1..n] ]
hdquantile :: Fractional r => Rational -> L r
hdquantile q = L $ \n ->
let n' = fromIntegral n
np1 = n' + 1
q' = fromRational q
d = betaDistr (q'*np1) (np1*(1q')) in
if q == 0 then IM.singleton 0 1
else if q == 1 then IM.singleton (n 1) 1
else IM.fromAscList
[ (i, realToFrac $ D.cumulative d ((fromIntegral i + 1) / n')
D.cumulative d (fromIntegral i / n'))
| i <- [0 .. n 1]
]
r1 :: Num r => Estimator r
r1 p n = Estimate (np + 1%2) $ IM.singleton (clamp n (ceiling np 1)) 1
where np = fromIntegral n * p
r2 :: Fractional r => Estimator r
r2 p n = Estimate (np + 1%2) $
if p == 0 then IM.singleton 0 1
else if p == 1 then IM.singleton (n 1) 1
else IM.fromList [(ceiling np 1, 0.5), (floor np, 0.5)]
where np = fromIntegral n * p
r3 :: Num r => Estimator r
r3 p n = Estimate np $ IM.singleton (clamp n (round np 1)) 1
where np = fromIntegral n * p
continuousEstimator ::
Fractional r =>
(Rational -> (Rational, Rational)) ->
(Rational -> Rational -> Rational) ->
Estimator r
continuousEstimator bds f p n = Estimate h $
if p < lo then IM.singleton 0 1
else if p >= hi then IM.singleton (n 1) 1
else case properFraction h of
(w,frac) | frac' <- fromRational frac -> IM.fromList [(w 1, frac'), (w, 1 frac')]
where
r = fromIntegral n
h = f p r
(lo, hi) = bds r
r4 :: Fractional r => Estimator r
r4 = continuousEstimator (\n -> (1 / n, 1)) (*)
r5 :: Fractional r => Estimator r
r5 = continuousEstimator (\n -> let tn = 2 * n in (1 / tn, (tn 1) / tn)) $ \p n -> p*n + 0.5
r6 :: Fractional r => Estimator r
r6 = continuousEstimator (\n -> (1 / (n + 1), n / (n + 1))) $ \p n -> p*(n+1)
r7 :: Fractional r => Estimator r
r7 = continuousEstimator (\_ -> (0, 1)) $ \p n -> p*(n1) + 1
r8 :: Fractional r => Estimator r
r8 = continuousEstimator (\n -> (2/3 / (n + 1/3), (n 1/3)/(n + 1/3))) $ \p n -> p*(n + 1/3) + 1/3
r9 :: Fractional r => Estimator r
r9 = continuousEstimator (\n -> (0.625 / (n + 0.25), (n 0.375)/(n + 0.25))) $ \p n -> p*(n + 0.25) + 0.375
r10 :: Fractional r => Estimator r
r10 = continuousEstimator (\n -> (1.5 / (n + 2), (n + 0.5)/(n + 2))) $ \p n -> p*(n + 2) 0.5