{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveFoldable     #-}
{-# LANGUAGE DeriveFunctor      #-}
{-# LANGUAGE DeriveGeneric      #-}
{-# LANGUAGE FlexibleContexts   #-}
{-# LANGUAGE ViewPatterns       #-}
-- |
-- Module    : Statistics.Quantile
-- Copyright : (c) 2009 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- Functions for approximating quantiles, i.e. points taken at regular
-- intervals from the cumulative distribution function of a random
-- variable.
--
-- The number of quantiles is described below by the variable /q/, so
-- with /q/=4, a 4-quantile (also known as a /quartile/) has 4
-- intervals, and contains 5 points.  The parameter /k/ describes the
-- desired point, where 0 ≤ /k/ ≤ /q/.

module Statistics.Quantile
    (
    -- * Quantile estimation functions
    -- $cont_quantiles
      ContParam(..)
    , Default(..)
    , quantile
    , quantiles
    , quantilesVec
    -- ** Parameters for the continuous sample method
    , cadpw
    , hazen
    , spss
    , s
    , medianUnbiased
    , normalUnbiased
    -- * Other algorithms
    , weightedAvg
    -- * Median & other specializations
    , median
    , mad
    , midspread
    -- * Deprecated
    , continuousBy
    -- * References
    -- $references
    ) where

import           Data.Binary            (Binary)
import           Data.Aeson             (ToJSON,FromJSON)
import           Data.Data              (Data,Typeable)
import           Data.Default.Class
import qualified Data.Foldable        as F
import           Data.Vector.Generic ((!))
import qualified Data.Vector          as V
import qualified Data.Vector.Generic  as G
import qualified Data.Vector.Unboxed  as U
import qualified Data.Vector.Storable as S
import GHC.Generics (Generic)

import Statistics.Function (partialSort)


----------------------------------------------------------------
-- Quantile estimation
----------------------------------------------------------------

-- | O(/n/·log /n/). Estimate the /k/th /q/-quantile of a sample,
-- using the weighted average method. Up to rounding errors it's same
-- as @quantile s@.
--
-- The following properties should hold otherwise an error will be thrown.
--
--   * the length of the input is greater than @0@
--
--   * the input does not contain @NaN@
--
--   * k ≥ 0 and k ≤ q
weightedAvg :: G.Vector v Double =>
               Int        -- ^ /k/, the desired quantile.
            -> Int        -- ^ /q/, the number of quantiles.
            -> v Double   -- ^ /x/, the sample data.
            -> Double
weightedAvg :: forall (v :: * -> *).
Vector v Double =>
Int -> Int -> v Double -> Double
weightedAvg Int
k Int
q v Double
x
  | forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> Bool
G.any forall a. RealFloat a => a -> Bool
isNaN v Double
x   = forall a. String -> String -> a
modErr String
"weightedAvg" String
"Sample contains NaNs"
  | Int
n forall a. Eq a => a -> a -> Bool
== Int
0          = forall a. String -> String -> a
modErr String
"weightedAvg" String
"Sample is empty"
  | Int
n forall a. Eq a => a -> a -> Bool
== Int
1          = forall (v :: * -> *) a. Vector v a => v a -> a
G.head v Double
x
  | Int
q forall a. Ord a => a -> a -> Bool
< Int
2           = forall a. String -> String -> a
modErr String
"weightedAvg" String
"At least 2 quantiles is needed"
  | Int
k forall a. Eq a => a -> a -> Bool
== Int
q          = forall (v :: * -> *) a. (Vector v a, Ord a) => v a -> a
G.maximum v Double
x
  | Int
k forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
|| Int
k forall a. Ord a => a -> a -> Bool
< Int
q = Double
xj forall a. Num a => a -> a -> a
+ Double
g forall a. Num a => a -> a -> a
* (Double
xj1 forall a. Num a => a -> a -> a
- Double
xj)
  | Bool
otherwise       = forall a. String -> String -> a
modErr String
"weightedAvg" String
"Wrong quantile number"
  where
    j :: Int
j   = forall a b. (RealFrac a, Integral b) => a -> b
floor Double
idx
    idx :: Double
idx = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n forall a. Num a => a -> a -> a
- Int
1) forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
q
    g :: Double
g   = Double
idx forall a. Num a => a -> a -> a
- forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
j
    xj :: Double
xj  = v Double
sx forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
! Int
j
    xj1 :: Double
xj1 = v Double
sx forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
! (Int
jforall a. Num a => a -> a -> a
+Int
1)
    sx :: v Double
sx  = forall (v :: * -> *) e. (Vector v e, Ord e) => Int -> v e -> v e
partialSort (Int
jforall a. Num a => a -> a -> a
+Int
2) v Double
x
    n :: Int
n   = forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
x
{-# SPECIALIZE weightedAvg :: Int -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE weightedAvg :: Int -> Int -> V.Vector Double -> Double #-}
{-# SPECIALIZE weightedAvg :: Int -> Int -> S.Vector Double -> Double #-}


----------------------------------------------------------------
-- Quantiles continuous algorithm
----------------------------------------------------------------

-- $cont_quantiles
--
-- Below is family of functions which use same algorithm for estimation
-- of sample quantiles. It approximates empirical CDF as continuous
-- piecewise function which interpolates linearly between points
-- \((X_k,p_k)\) where \(X_k\) is k-th order statistics (k-th smallest
-- element) and \(p_k\) is probability corresponding to
-- it. 'ContParam' determines how \(p_k\) is chosen. For more detailed
-- explanation see [Hyndman1996].
--
-- This is the method used by most statistical software, such as R,
-- Mathematica, SPSS, and S.


-- | Parameters /α/ and /β/ to the 'continuousBy' function. Exact
--   meaning of parameters is described in [Hyndman1996] in section
--   \"Piecewise linear functions\"
data ContParam = ContParam {-# UNPACK #-} !Double {-# UNPACK #-} !Double
  deriving (Int -> ContParam -> ShowS
[ContParam] -> ShowS
ContParam -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ContParam] -> ShowS
$cshowList :: [ContParam] -> ShowS
show :: ContParam -> String
$cshow :: ContParam -> String
showsPrec :: Int -> ContParam -> ShowS
$cshowsPrec :: Int -> ContParam -> ShowS
Show,ContParam -> ContParam -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ContParam -> ContParam -> Bool
$c/= :: ContParam -> ContParam -> Bool
== :: ContParam -> ContParam -> Bool
$c== :: ContParam -> ContParam -> Bool
Eq,Eq ContParam
ContParam -> ContParam -> Bool
ContParam -> ContParam -> Ordering
ContParam -> ContParam -> ContParam
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ContParam -> ContParam -> ContParam
$cmin :: ContParam -> ContParam -> ContParam
max :: ContParam -> ContParam -> ContParam
$cmax :: ContParam -> ContParam -> ContParam
>= :: ContParam -> ContParam -> Bool
$c>= :: ContParam -> ContParam -> Bool
> :: ContParam -> ContParam -> Bool
$c> :: ContParam -> ContParam -> Bool
<= :: ContParam -> ContParam -> Bool
$c<= :: ContParam -> ContParam -> Bool
< :: ContParam -> ContParam -> Bool
$c< :: ContParam -> ContParam -> Bool
compare :: ContParam -> ContParam -> Ordering
$ccompare :: ContParam -> ContParam -> Ordering
Ord,Typeable ContParam
ContParam -> DataType
ContParam -> Constr
(forall b. Data b => b -> b) -> ContParam -> ContParam
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u. Int -> (forall d. Data d => d -> u) -> ContParam -> u
forall u. (forall d. Data d => d -> u) -> ContParam -> [u]
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ContParam -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ContParam -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> ContParam -> m ContParam
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> ContParam -> m ContParam
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ContParam
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ContParam -> c ContParam
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ContParam)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c ContParam)
gmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> ContParam -> m ContParam
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> ContParam -> m ContParam
gmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> ContParam -> m ContParam
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> ContParam -> m ContParam
gmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> ContParam -> m ContParam
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> ContParam -> m ContParam
gmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> ContParam -> u
$cgmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> ContParam -> u
gmapQ :: forall u. (forall d. Data d => d -> u) -> ContParam -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> ContParam -> [u]
gmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ContParam -> r
$cgmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ContParam -> r
gmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ContParam -> r
$cgmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ContParam -> r
gmapT :: (forall b. Data b => b -> b) -> ContParam -> ContParam
$cgmapT :: (forall b. Data b => b -> b) -> ContParam -> ContParam
dataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c ContParam)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c ContParam)
dataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ContParam)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ContParam)
dataTypeOf :: ContParam -> DataType
$cdataTypeOf :: ContParam -> DataType
toConstr :: ContParam -> Constr
$ctoConstr :: ContParam -> Constr
gunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ContParam
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ContParam
gfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ContParam -> c ContParam
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ContParam -> c ContParam
Data,Typeable,forall x. Rep ContParam x -> ContParam
forall x. ContParam -> Rep ContParam x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep ContParam x -> ContParam
$cfrom :: forall x. ContParam -> Rep ContParam x
Generic)

-- | We use 's' as default value which is same as R's default.
instance Default ContParam where
  def :: ContParam
def = ContParam
s

instance Binary   ContParam
instance ToJSON   ContParam
instance FromJSON ContParam

-- | O(/n/·log /n/). Estimate the /k/th /q/-quantile of a sample /x/,
--   using the continuous sample method with the given parameters.
--
--   The following properties should hold, otherwise an error will be thrown.
--
--     * input sample must be nonempty
--
--     * the input does not contain @NaN@
--
--     * 0 ≤ k ≤ q
quantile :: G.Vector v Double
         => ContParam  -- ^ Parameters /α/ and /β/.
         -> Int        -- ^ /k/, the desired quantile.
         -> Int        -- ^ /q/, the number of quantiles.
         -> v Double   -- ^ /x/, the sample data.
         -> Double
quantile :: forall (v :: * -> *).
Vector v Double =>
ContParam -> Int -> Int -> v Double -> Double
quantile ContParam
param Int
q Int
nQ v Double
xs
  | Int
nQ forall a. Ord a => a -> a -> Bool
< Int
2         = forall a. String -> String -> a
modErr String
"continuousBy" String
"At least 2 quantiles is needed"
  | Int -> Int -> Bool
badQ Int
nQ Int
q      = forall a. String -> String -> a
modErr String
"continuousBy" String
"Wrong quantile number"
  | forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> Bool
G.any forall a. RealFloat a => a -> Bool
isNaN v Double
xs = forall a. String -> String -> a
modErr String
"continuousBy" String
"Sample contains NaNs"
  | Bool
otherwise      = forall (v :: * -> *).
Vector v Double =>
v Double -> Double -> Double
estimateQuantile v Double
sortedXs Double
pk
  where
    pk :: Double
pk       = ContParam -> Int -> Int -> Int -> Double
toPk ContParam
param Int
n Int
q Int
nQ
    sortedXs :: v Double
sortedXs = forall (v :: * -> *).
Vector v Double =>
v Double -> Int -> v Double
psort v Double
xs forall a b. (a -> b) -> a -> b
$ forall a b. (RealFrac a, Integral b) => a -> b
floor Double
pk forall a. Num a => a -> a -> a
+ Int
1
    n :: Int
n        = forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
xs
{-# INLINABLE quantile #-}
{-# SPECIALIZE
    quantile :: ContParam -> Int -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE
    quantile :: ContParam -> Int -> Int -> V.Vector Double -> Double #-}
{-# SPECIALIZE
    quantile :: ContParam -> Int -> Int -> S.Vector Double -> Double #-}

-- | O(/k·n/·log /n/). Estimate set of the /k/th /q/-quantile of a
--   sample /x/, using the continuous sample method with the given
--   parameters. This is faster than calling quantile repeatedly since
--   sample should be sorted only once
--
--   The following properties should hold, otherwise an error will be thrown.
--
--     * input sample must be nonempty
--
--     * the input does not contain @NaN@
--
--     * for every k in set of quantiles 0 ≤ k ≤ q
quantiles :: (G.Vector v Double, F.Foldable f, Functor f)
  => ContParam
  -> f Int
  -> Int
  -> v Double
  -> f Double
quantiles :: forall (v :: * -> *) (f :: * -> *).
(Vector v Double, Foldable f, Functor f) =>
ContParam -> f Int -> Int -> v Double -> f Double
quantiles ContParam
param f Int
qs Int
nQ v Double
xs
  | Int
nQ forall a. Ord a => a -> a -> Bool
< Int
2             = forall a. String -> String -> a
modErr String
"quantiles" String
"At least 2 quantiles is needed"
  | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
F.any (Int -> Int -> Bool
badQ Int
nQ) f Int
qs = forall a. String -> String -> a
modErr String
"quantiles" String
"Wrong quantile number"
  | forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> Bool
G.any forall a. RealFloat a => a -> Bool
isNaN v Double
xs     = forall a. String -> String -> a
modErr String
"quantiles" String
"Sample contains NaNs"
  -- Doesn't matter what we put into empty container
  | forall (t :: * -> *) a. Foldable t => t a -> Bool
null f Int
qs            = Double
0 forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ f Int
qs
  | Bool
otherwise          = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall (v :: * -> *).
Vector v Double =>
v Double -> Double -> Double
estimateQuantile v Double
sortedXs) f Double
ks'
  where
    ks' :: f Double
ks'      = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Int
q -> ContParam -> Int -> Int -> Int -> Double
toPk ContParam
param Int
n Int
q Int
nQ) f Int
qs
    sortedXs :: v Double
sortedXs = forall (v :: * -> *).
Vector v Double =>
v Double -> Int -> v Double
psort v Double
xs forall a b. (a -> b) -> a -> b
$ forall a b. (RealFrac a, Integral b) => a -> b
floor (forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
F.maximum f Double
ks') forall a. Num a => a -> a -> a
+ Int
1
    n :: Int
n        = forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
xs
{-# INLINABLE quantiles #-}
{-# SPECIALIZE quantiles
      :: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> V.Vector Double -> f Double #-}
{-# SPECIALIZE quantiles
      :: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> U.Vector Double -> f Double #-}
{-# SPECIALIZE quantiles
      :: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> S.Vector Double -> f Double #-}

-- | O(/k·n/·log /n/). Same as quantiles but uses 'G.Vector' container
--   instead of 'Foldable' one.
quantilesVec :: (G.Vector v Double, G.Vector v Int)
  => ContParam
  -> v Int
  -> Int
  -> v Double
  -> v Double
quantilesVec :: forall (v :: * -> *).
(Vector v Double, Vector v Int) =>
ContParam -> v Int -> Int -> v Double -> v Double
quantilesVec ContParam
param v Int
qs Int
nQ v Double
xs
  | Int
nQ forall a. Ord a => a -> a -> Bool
< Int
2             = forall a. String -> String -> a
modErr String
"quantilesVec" String
"At least 2 quantiles is needed"
  | forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> Bool
G.any (Int -> Int -> Bool
badQ Int
nQ) v Int
qs = forall a. String -> String -> a
modErr String
"quantilesVec" String
"Wrong quantile number"
  | forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> Bool
G.any forall a. RealFloat a => a -> Bool
isNaN v Double
xs     = forall a. String -> String -> a
modErr String
"quantilesVec" String
"Sample contains NaNs"
  | forall (v :: * -> *) a. Vector v a => v a -> Bool
G.null v Int
qs          = forall (v :: * -> *) a. Vector v a => v a
G.empty
  | Bool
otherwise          = forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map (forall (v :: * -> *).
Vector v Double =>
v Double -> Double -> Double
estimateQuantile v Double
sortedXs) v Double
ks'
  where
    ks' :: v Double
ks'      = forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map (\Int
q -> ContParam -> Int -> Int -> Int -> Double
toPk ContParam
param Int
n Int
q Int
nQ) v Int
qs
    sortedXs :: v Double
sortedXs = forall (v :: * -> *).
Vector v Double =>
v Double -> Int -> v Double
psort v Double
xs forall a b. (a -> b) -> a -> b
$ forall a b. (RealFrac a, Integral b) => a -> b
floor (forall (v :: * -> *) a. (Vector v a, Ord a) => v a -> a
G.maximum v Double
ks') forall a. Num a => a -> a -> a
+ Int
1
    n :: Int
n        = forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
xs
{-# INLINABLE quantilesVec #-}
{-# SPECIALIZE quantilesVec
      :: ContParam -> V.Vector Int -> Int -> V.Vector Double -> V.Vector Double #-}
{-# SPECIALIZE quantilesVec
      :: ContParam -> U.Vector Int -> Int -> U.Vector Double -> U.Vector Double #-}
{-# SPECIALIZE quantilesVec
      :: ContParam -> S.Vector Int -> Int -> S.Vector Double -> S.Vector Double #-}


-- Returns True if quantile number is out of range
badQ :: Int -> Int -> Bool
badQ :: Int -> Int -> Bool
badQ Int
nQ Int
q = Int
q forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| Int
q forall a. Ord a => a -> a -> Bool
> Int
nQ

-- Obtain k from equation for p_k [Hyndman1996] p.363.  Note that
-- equation defines p_k for integer k but we calculate it as real
-- value and will use fractional part for linear interpolation. This
-- is correct since equation is linear.
toPk
  :: ContParam
  -> Int        -- ^ /n/ number of elements
  -> Int        -- ^ /k/, the desired quantile.
  -> Int        -- ^ /q/, the number of quantiles.
  -> Double
toPk :: ContParam -> Int -> Int -> Int -> Double
toPk (ContParam Double
a Double
b) (forall a b. (Integral a, Num b) => a -> b
fromIntegral -> Double
n) Int
q Int
nQ
  = Double
a forall a. Num a => a -> a -> a
+ Double
p forall a. Num a => a -> a -> a
* (Double
n forall a. Num a => a -> a -> a
+ Double
1 forall a. Num a => a -> a -> a
- Double
a forall a. Num a => a -> a -> a
- Double
b)
  where
    p :: Double
p = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
q forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nQ

-- Estimate quantile for given k (including fractional part)
estimateQuantile :: G.Vector v Double => v Double -> Double -> Double
{-# INLINE estimateQuantile #-}
estimateQuantile :: forall (v :: * -> *).
Vector v Double =>
v Double -> Double -> Double
estimateQuantile v Double
sortedXs Double
k'
  = (Double
1forall a. Num a => a -> a -> a
-Double
g) forall a. Num a => a -> a -> a
* Int -> Double
item (Int
kforall a. Num a => a -> a -> a
-Int
1) forall a. Num a => a -> a -> a
+ Double
g forall a. Num a => a -> a -> a
* Int -> Double
item Int
k
  where
    (Int
k,Double
g) = forall a b. (RealFrac a, Integral b) => a -> (b, a)
properFraction Double
k'
    item :: Int -> Double
item  = (v Double
sortedXs forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
!) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int
clamp
    --
    clamp :: Int -> Int
clamp = forall a. Ord a => a -> a -> a
max Int
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => a -> a -> a
min (Int
n forall a. Num a => a -> a -> a
- Int
1)
    n :: Int
n     = forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sortedXs

psort :: G.Vector v Double => v Double -> Int -> v Double
psort :: forall (v :: * -> *).
Vector v Double =>
v Double -> Int -> v Double
psort v Double
xs Int
k = forall (v :: * -> *) e. (Vector v e, Ord e) => Int -> v e -> v e
partialSort (forall a. Ord a => a -> a -> a
max Int
0 forall a b. (a -> b) -> a -> b
$ forall a. Ord a => a -> a -> a
min (forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
xs forall a. Num a => a -> a -> a
- Int
1) Int
k) v Double
xs
{-# INLINE psort #-}


-- | California Department of Public Works definition, /α/=0, /β/=1.
-- Gives a linear interpolation of the empirical CDF.  This
-- corresponds to method 4 in R and Mathematica.
cadpw :: ContParam
cadpw :: ContParam
cadpw = Double -> Double -> ContParam
ContParam Double
0 Double
1

-- | Hazen's definition, /α/=0.5, /β/=0.5.  This is claimed to be
-- popular among hydrologists.  This corresponds to method 5 in R and
-- Mathematica.
hazen :: ContParam
hazen :: ContParam
hazen = Double -> Double -> ContParam
ContParam Double
0.5 Double
0.5

-- | Definition used by the SPSS statistics application, with /α/=0,
-- /β/=0 (also known as Weibull's definition).  This corresponds to
-- method 6 in R and Mathematica.
spss :: ContParam
spss :: ContParam
spss = Double -> Double -> ContParam
ContParam Double
0 Double
0

-- | Definition used by the S statistics application, with /α/=1,
-- /β/=1.  The interpolation points divide the sample range into @n-1@
-- intervals.  This corresponds to method 7 in R and Mathematica and
-- is default in R.
s :: ContParam
s :: ContParam
s = Double -> Double -> ContParam
ContParam Double
1 Double
1

-- | Median unbiased definition, /α/=1\/3, /β/=1\/3. The resulting
-- quantile estimates are approximately median unbiased regardless of
-- the distribution of /x/.  This corresponds to method 8 in R and
-- Mathematica.
medianUnbiased :: ContParam
medianUnbiased :: ContParam
medianUnbiased = Double -> Double -> ContParam
ContParam Double
third Double
third
    where third :: Double
third = Double
1forall a. Fractional a => a -> a -> a
/Double
3

-- | Normal unbiased definition, /α/=3\/8, /β/=3\/8.  An approximately
-- unbiased estimate if the empirical distribution approximates the
-- normal distribution.  This corresponds to method 9 in R and
-- Mathematica.
normalUnbiased :: ContParam
normalUnbiased :: ContParam
normalUnbiased = Double -> Double -> ContParam
ContParam Double
ta Double
ta
    where ta :: Double
ta = Double
3forall a. Fractional a => a -> a -> a
/Double
8

modErr :: String -> String -> a
modErr :: forall a. String -> String -> a
modErr String
f String
err = forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"Statistics.Quantile." forall a. [a] -> [a] -> [a]
++ String
f forall a. [a] -> [a] -> [a]
++ String
": " forall a. [a] -> [a] -> [a]
++ String
err


----------------------------------------------------------------
-- Specializations
----------------------------------------------------------------

-- | O(/n/·log /n/) Estimate median of sample
median :: G.Vector v Double
       => ContParam  -- ^ Parameters /α/ and /β/.
       -> v Double   -- ^ /x/, the sample data.
       -> Double
{-# INLINE median #-}
median :: forall (v :: * -> *).
Vector v Double =>
ContParam -> v Double -> Double
median ContParam
p = forall (v :: * -> *).
Vector v Double =>
ContParam -> Int -> Int -> v Double -> Double
quantile ContParam
p Int
1 Int
2

-- | O(/n/·log /n/). Estimate the range between /q/-quantiles 1 and
-- /q/-1 of a sample /x/, using the continuous sample method with the
-- given parameters.
--
-- For instance, the interquartile range (IQR) can be estimated as
-- follows:
--
-- > midspread medianUnbiased 4 (U.fromList [1,1,2,2,3])
-- > ==> 1.333333
midspread :: G.Vector v Double =>
             ContParam  -- ^ Parameters /α/ and /β/.
          -> Int        -- ^ /q/, the number of quantiles.
          -> v Double   -- ^ /x/, the sample data.
          -> Double
midspread :: forall (v :: * -> *).
Vector v Double =>
ContParam -> Int -> v Double -> Double
midspread ContParam
param Int
k v Double
x
  | forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> Bool
G.any forall a. RealFloat a => a -> Bool
isNaN v Double
x = forall a. String -> String -> a
modErr String
"midspread" String
"Sample contains NaNs"
  | Int
k forall a. Ord a => a -> a -> Bool
<= Int
0        = forall a. String -> String -> a
modErr String
"midspread" String
"Nonpositive number of quantiles"
  | Bool
otherwise     = let Pair Double
x1 Double
x2 = forall (v :: * -> *) (f :: * -> *).
(Vector v Double, Foldable f, Functor f) =>
ContParam -> f Int -> Int -> v Double -> f Double
quantiles ContParam
param (forall a. a -> a -> Pair a
Pair Int
1 (Int
kforall a. Num a => a -> a -> a
-Int
1)) Int
k v Double
x
                    in  Double
x2 forall a. Num a => a -> a -> a
- Double
x1
{-# INLINABLE  midspread #-}
{-# SPECIALIZE midspread :: ContParam -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE midspread :: ContParam -> Int -> V.Vector Double -> Double #-}
{-# SPECIALIZE midspread :: ContParam -> Int -> S.Vector Double -> Double #-}

data Pair a = Pair !a !a
  deriving (forall a b. a -> Pair b -> Pair a
forall a b. (a -> b) -> Pair a -> Pair b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> Pair b -> Pair a
$c<$ :: forall a b. a -> Pair b -> Pair a
fmap :: forall a b. (a -> b) -> Pair a -> Pair b
$cfmap :: forall a b. (a -> b) -> Pair a -> Pair b
Functor, forall a. Eq a => a -> Pair a -> Bool
forall a. Num a => Pair a -> a
forall a. Ord a => Pair a -> a
forall m. Monoid m => Pair m -> m
forall a. Pair a -> Bool
forall a. Pair a -> Int
forall a. Pair a -> [a]
forall a. (a -> a -> a) -> Pair a -> a
forall m a. Monoid m => (a -> m) -> Pair a -> m
forall b a. (b -> a -> b) -> b -> Pair a -> b
forall a b. (a -> b -> b) -> b -> Pair a -> b
forall (t :: * -> *).
(forall m. Monoid m => t m -> m)
-> (forall m a. Monoid m => (a -> m) -> t a -> m)
-> (forall m a. Monoid m => (a -> m) -> t a -> m)
-> (forall a b. (a -> b -> b) -> b -> t a -> b)
-> (forall a b. (a -> b -> b) -> b -> t a -> b)
-> (forall b a. (b -> a -> b) -> b -> t a -> b)
-> (forall b a. (b -> a -> b) -> b -> t a -> b)
-> (forall a. (a -> a -> a) -> t a -> a)
-> (forall a. (a -> a -> a) -> t a -> a)
-> (forall a. t a -> [a])
-> (forall a. t a -> Bool)
-> (forall a. t a -> Int)
-> (forall a. Eq a => a -> t a -> Bool)
-> (forall a. Ord a => t a -> a)
-> (forall a. Ord a => t a -> a)
-> (forall a. Num a => t a -> a)
-> (forall a. Num a => t a -> a)
-> Foldable t
product :: forall a. Num a => Pair a -> a
$cproduct :: forall a. Num a => Pair a -> a
sum :: forall a. Num a => Pair a -> a
$csum :: forall a. Num a => Pair a -> a
minimum :: forall a. Ord a => Pair a -> a
$cminimum :: forall a. Ord a => Pair a -> a
maximum :: forall a. Ord a => Pair a -> a
$cmaximum :: forall a. Ord a => Pair a -> a
elem :: forall a. Eq a => a -> Pair a -> Bool
$celem :: forall a. Eq a => a -> Pair a -> Bool
length :: forall a. Pair a -> Int
$clength :: forall a. Pair a -> Int
null :: forall a. Pair a -> Bool
$cnull :: forall a. Pair a -> Bool
toList :: forall a. Pair a -> [a]
$ctoList :: forall a. Pair a -> [a]
foldl1 :: forall a. (a -> a -> a) -> Pair a -> a
$cfoldl1 :: forall a. (a -> a -> a) -> Pair a -> a
foldr1 :: forall a. (a -> a -> a) -> Pair a -> a
$cfoldr1 :: forall a. (a -> a -> a) -> Pair a -> a
foldl' :: forall b a. (b -> a -> b) -> b -> Pair a -> b
$cfoldl' :: forall b a. (b -> a -> b) -> b -> Pair a -> b
foldl :: forall b a. (b -> a -> b) -> b -> Pair a -> b
$cfoldl :: forall b a. (b -> a -> b) -> b -> Pair a -> b
foldr' :: forall a b. (a -> b -> b) -> b -> Pair a -> b
$cfoldr' :: forall a b. (a -> b -> b) -> b -> Pair a -> b
foldr :: forall a b. (a -> b -> b) -> b -> Pair a -> b
$cfoldr :: forall a b. (a -> b -> b) -> b -> Pair a -> b
foldMap' :: forall m a. Monoid m => (a -> m) -> Pair a -> m
$cfoldMap' :: forall m a. Monoid m => (a -> m) -> Pair a -> m
foldMap :: forall m a. Monoid m => (a -> m) -> Pair a -> m
$cfoldMap :: forall m a. Monoid m => (a -> m) -> Pair a -> m
fold :: forall m. Monoid m => Pair m -> m
$cfold :: forall m. Monoid m => Pair m -> m
F.Foldable)


-- | O(/n/·log /n/). Estimate the median absolute deviation (MAD) of a
--   sample /x/ using 'continuousBy'. It's robust estimate of
--   variability in sample and defined as:
--
--   \[
--   MAD = \operatorname{median}(| X_i - \operatorname{median}(X) |)
--   \]
mad :: G.Vector v Double
    => ContParam  -- ^ Parameters /α/ and /β/.
    -> v Double   -- ^ /x/, the sample data.
    -> Double
mad :: forall (v :: * -> *).
Vector v Double =>
ContParam -> v Double -> Double
mad ContParam
p v Double
xs
  = forall (v :: * -> *).
Vector v Double =>
ContParam -> v Double -> Double
median ContParam
p forall a b. (a -> b) -> a -> b
$ forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map (forall a. Num a => a -> a
abs forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Num a => a -> a -> a
subtract Double
med) v Double
xs
  where
    med :: Double
med = forall (v :: * -> *).
Vector v Double =>
ContParam -> v Double -> Double
median ContParam
p v Double
xs
{-# INLINABLE  mad #-}
{-# SPECIALIZE mad :: ContParam -> U.Vector Double -> Double #-}
{-# SPECIALIZE mad :: ContParam -> V.Vector Double -> Double #-}
{-# SPECIALIZE mad :: ContParam -> S.Vector Double -> Double #-}


----------------------------------------------------------------
-- Deprecated
----------------------------------------------------------------

continuousBy :: G.Vector v Double =>
                ContParam  -- ^ Parameters /α/ and /β/.
             -> Int        -- ^ /k/, the desired quantile.
             -> Int        -- ^ /q/, the number of quantiles.
             -> v Double   -- ^ /x/, the sample data.
             -> Double
continuousBy :: forall (v :: * -> *).
Vector v Double =>
ContParam -> Int -> Int -> v Double -> Double
continuousBy = forall (v :: * -> *).
Vector v Double =>
ContParam -> Int -> Int -> v Double -> Double
quantile
{-# DEPRECATED continuousBy "Use quantile instead" #-}

-- $references
--
-- * Weisstein, E.W. Quantile. /MathWorld/.
--   <http://mathworld.wolfram.com/Quantile.html>
--
-- * [Hyndman1996] Hyndman, R.J.; Fan, Y. (1996) Sample quantiles in statistical
--   packages. /American Statistician/
--   50(4):361&#8211;365. <http://www.jstor.org/stable/2684934>