```{-# LANGUAGE DeriveDataTypeable, PatternGuards #-}
module Data.Pass.L.Estimator
( Estimator(..)
, Estimate(..)
, estimateBy
) where

import Data.Ratio
import Data.Binary
import Data.Data
import qualified Data.IntMap as IM
import Data.IntMap (IntMap)
import Data.Pass.Util (clamp)
import Data.Hashable
import Data.Pass.Util.Beta (Beta(Beta))
import qualified Data.Pass.Util.Beta as Beta

-- | Techniques used to smooth the nearest values when calculating quantile functions. R2 is used by default, and the numbering convention follows the
-- use in the R programming language, as far as it goes.
data Estimator
= R1  -- ^ Inverse of the empirical distribution function
| R2  -- ^ .. with averaging at discontinuities (default)
| R3  -- ^ The observation numbered closest to Np. NB: does not yield a proper median
| R4  -- ^ Linear interpolation of the empirical distribution function. NB: does not yield a proper median.
| R5  -- ^ .. with knots midway through the steps as used in hydrology. This is the simplest continuous estimator that yields a correct median
| R6  -- ^ Linear interpolation of the expectations of the order statistics for the uniform distribution on [0,1]
| R7  -- ^ Linear interpolation of the modes for the order statistics for the uniform distribution on [0,1]
| R8  -- ^ Linear interpolation of the approximate medans for order statistics.
| R9  -- ^ The resulting quantile estimates are approximately unbiased for the expected order statistics if x is normally distributed.
| R10 -- ^ When rounding h, this yields the order statistic with the least expected square deviation relative to p.
| HD  -- ^ The Harrell-Davis quantile estimator based on bootstrapped order statistics

instance Binary Estimator where
put e = put (fromIntegral (fromEnum e) :: Word8)
get = do
i <- get :: Get Word8
return \$ toEnum (fromIntegral i)

instance Hashable Estimator where
hashWithSalt n e = n `hashWithSalt` fromEnum e

data Estimate r = Estimate {-# UNPACK #-} !Rational (IntMap r)
deriving Show

continuousEstimator ::
Fractional r =>
(Rational -> (Rational, Rational)) ->
(Rational -> Rational -> Rational) ->
Rational -> Int -> Estimate r
continuousEstimator bds f p n = Estimate h \$
if p < lo then IM.singleton 0 1
else if p >= hi then IM.singleton (n - 1) 1
else case properFraction h of
(w,frac) | frac' <- fromRational frac -> IM.fromList [(w - 1, frac'), (w, 1 - frac')]
where
r = fromIntegral n
h = f p r
(lo, hi) = bds r

estimateBy :: Fractional r => Estimator -> Rational -> Int -> Estimate r
estimateBy HD = \q n -> Estimate (1%2) \$ let
n' = fromIntegral n
np1 = n' + 1
q' = fromRational q
d = Beta (q'*np1) (np1*(1-q'))
in if q == 0 then IM.singleton 0 1
else if q == 1 then IM.singleton (n - 1) 1
else IM.fromListWith (+)
[ (i, realToFrac \$ Beta.cumulative d ((fromIntegral i + 1) / n') - Beta.cumulative d (fromIntegral i / n'))
| i <- [0 .. n-1]
]
estimateBy R1  = \p n -> let np = fromIntegral n * p in Estimate (np + 1%2) \$ IM.singleton (clamp n (ceiling np - 1)) 1
estimateBy R2  = \p n -> let np = fromIntegral n * p in Estimate (np + 1%2) \$
if p == 0      then IM.singleton 0       1
else if p == 1 then IM.singleton (n - 1) 1
else IM.fromListWith (+) [(clamp n (ceiling np - 1), 0.5), (clamp n (floor np), 0.5)]
estimateBy R3  = \p n -> let np = fromIntegral n * p in Estimate np \$ IM.singleton (clamp n (round np - 1)) 1
estimateBy R4  = continuousEstimator (\n -> (recip n, 1)) (*)
estimateBy R5  = continuousEstimator (\n -> let tn = 2 * n in (recip tn, (tn - 1) / tn)) \$ \p n -> p*n + 0.5
estimateBy R6  = continuousEstimator (\n -> (recip (n + 1), n / (n + 1))) \$ \p n -> p*(n+1)
estimateBy R7  = continuousEstimator (\_ -> (0, 1)) \$ \p n -> p*(n-1) + 1
estimateBy R8  = continuousEstimator (\n -> (2/3 / (n + 1/3), (n - 1/3)/(n + 1/3))) \$ \p n -> p*(n + 1/3) + 1/3
estimateBy R9  = continuousEstimator (\n -> (0.625 / (n + 0.25), (n - 0.375)/(n + 0.25))) \$ \p n -> p*(n + 0.25) + 0.375
estimateBy R10 = continuousEstimator (\n -> (1.5 / (n + 2), (n + 0.5)/(n + 2))) \$ \p n -> p*(n + 2) - 0.5
```