-- |
-- Module    : Statistics.Resampling.Bootstrap
-- Copyright : (c) 2009, 2011 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- The bootstrap method for statistical inference.

module Statistics.Resampling.Bootstrap
    ( bootstrapBCA
    , basicBootstrap
    -- * References
    -- $references
    ) where

import           Data.Vector.Generic ((!))
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Generic as G

import Statistics.Distribution (cumulative, quantile)
import Statistics.Distribution.Normal
import Statistics.Resampling (Bootstrap(..), jackknife)
import Statistics.Sample (mean)
import Statistics.Types (Sample, CL, Estimate, ConfInt, estimateFromInterval,
                         estimateFromErr, CL, significanceLevel)
import Statistics.Function (gsort)

import qualified Statistics.Resampling as R

import Control.Parallel.Strategies (parMap, rdeepseq)

data T = {-# UNPACK #-} !Double :< {-# UNPACK #-} !Double
infixl 2 :<

-- | Bias-corrected accelerated (BCA) bootstrap. This adjusts for both
--   bias and skewness in the resampled distribution.
--
--   BCA algorithm is described in ch. 5 of Davison, Hinkley "Confidence
--   intervals" in section 5.3 "Percentile method"
bootstrapBCA
  :: CL Double       -- ^ Confidence level
  -> Sample          -- ^ Full data sample
  -> [(R.Estimator, Bootstrap U.Vector Double)]
  -- ^ Estimates obtained from resampled data and estimator used for
  --   this.
  -> [Estimate ConfInt Double]
bootstrapBCA :: CL Double
-> Sample
-> [(Estimator, Bootstrap Vector Double)]
-> [Estimate ConfInt Double]
bootstrapBCA CL Double
confidenceLevel Sample
sample [(Estimator, Bootstrap Vector Double)]
resampledData
  = forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap forall a. NFData a => Strategy a
rdeepseq forall {a}.
(Num a, Unbox a, Ord a) =>
(Estimator, Bootstrap Vector a) -> Estimate ConfInt a
e [(Estimator, Bootstrap Vector Double)]
resampledData
  where
    e :: (Estimator, Bootstrap Vector a) -> Estimate ConfInt a
e (Estimator
est, Bootstrap a
pt Vector a
resample)
      | forall a. Unbox a => Vector a -> Int
U.length Sample
sample forall a. Eq a => a -> a -> Bool
== Int
1 Bool -> Bool -> Bool
|| forall a. RealFloat a => a -> Bool
isInfinite Double
bias =
          forall a. a -> (a, a) -> CL Double -> Estimate ConfInt a
estimateFromErr      a
pt (a
0,a
0) CL Double
confidenceLevel
      | Bool
otherwise =
          forall a. Num a => a -> (a, a) -> CL Double -> Estimate ConfInt a
estimateFromInterval a
pt (Vector a
resample forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
! Int
lo, Vector a
resample forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
! Int
hi) CL Double
confidenceLevel
      where
        -- Quantile estimates for given CL
        lo :: Int
lo    = forall a. Ord a => a -> a -> a
min (forall a. Ord a => a -> a -> a
max (Double -> Int
cumn Double
a1) Int
0) (Int
ni forall a. Num a => a -> a -> a
- Int
1)
          where a1 :: Double
a1 = Double
bias forall a. Num a => a -> a -> a
+ Double
b1 forall a. Fractional a => a -> a -> a
/ (Double
1 forall a. Num a => a -> a -> a
- Double
accel forall a. Num a => a -> a -> a
* Double
b1)
                b1 :: Double
b1 = Double
bias forall a. Num a => a -> a -> a
+ Double
z1
        hi :: Int
hi    = forall a. Ord a => a -> a -> a
max (forall a. Ord a => a -> a -> a
min (Double -> Int
cumn Double
a2) (Int
ni forall a. Num a => a -> a -> a
- Int
1)) Int
0
          where a2 :: Double
a2 = Double
bias forall a. Num a => a -> a -> a
+ Double
b2 forall a. Fractional a => a -> a -> a
/ (Double
1 forall a. Num a => a -> a -> a
- Double
accel forall a. Num a => a -> a -> a
* Double
b2)
                b2 :: Double
b2 = Double
bias forall a. Num a => a -> a -> a
- Double
z1
        -- Number of resamples
        ni :: Int
ni    = forall a. Unbox a => Vector a -> Int
U.length Vector a
resample
        n :: Double
n     = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
ni
        -- Corrections
        z1 :: Double
z1    = forall d. ContDistr d => d -> Double -> Double
quantile NormalDistribution
standard (forall a. CL a -> a
significanceLevel CL Double
confidenceLevel forall a. Fractional a => a -> a -> a
/ Double
2)
        cumn :: Double -> Int
cumn  = forall a b. (RealFrac a, Integral b) => a -> b
round forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Num a => a -> a -> a
*Double
n) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall d. Distribution d => d -> Double -> Double
cumulative NormalDistribution
standard
        bias :: Double
bias  = forall d. ContDistr d => d -> Double -> Double
quantile NormalDistribution
standard (Double
probN forall a. Fractional a => a -> a -> a
/ Double
n)
          where probN :: Double
probN = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => Vector a -> Int
U.length forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
U.filter (forall a. Ord a => a -> a -> Bool
<a
pt) forall a b. (a -> b) -> a -> b
$ Vector a
resample
        accel :: Double
accel = Double
sumCubes forall a. Fractional a => a -> a -> a
/ (Double
6 forall a. Num a => a -> a -> a
* (Double
sumSquares forall a. Floating a => a -> a -> a
** Double
1.5))
          where (Double
sumSquares :< Double
sumCubes) = forall b a. Unbox b => (a -> b -> a) -> a -> Vector b -> a
U.foldl' T -> Double -> T
f (Double
0 Double -> Double -> T
:< Double
0) Sample
jack
                f :: T -> Double -> T
f (Double
s :< Double
c) Double
j = Double
s forall a. Num a => a -> a -> a
+ Double
d2 Double -> Double -> T
:< Double
c forall a. Num a => a -> a -> a
+ Double
d2 forall a. Num a => a -> a -> a
* Double
d
                    where d :: Double
d  = Double
jackMean forall a. Num a => a -> a -> a
- Double
j
                          d2 :: Double
d2 = Double
d forall a. Num a => a -> a -> a
* Double
d
                jackMean :: Double
jackMean     = forall (v :: * -> *). Vector v Double => v Double -> Double
mean Sample
jack
        jack :: Sample
jack  = Estimator -> Sample -> Sample
jackknife Estimator
est Sample
sample


-- | Basic bootstrap. This method simply uses empirical quantiles for
--   confidence interval.
basicBootstrap
  :: (G.Vector v a, Ord a, Num a)
  => CL Double       -- ^ Confidence vector
  -> Bootstrap v a   -- ^ Estimate from full sample and vector of
                     --   estimates obtained from resamples
  -> Estimate ConfInt a
{-# INLINE basicBootstrap #-}
basicBootstrap :: forall (v :: * -> *) a.
(Vector v a, Ord a, Num a) =>
CL Double -> Bootstrap v a -> Estimate ConfInt a
basicBootstrap CL Double
cl (Bootstrap a
e v a
ests)
  = forall a. Num a => a -> (a, a) -> CL Double -> Estimate ConfInt a
estimateFromInterval a
e (v a
sorted forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
! Int
lo, v a
sorted forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
! Int
hi) CL Double
cl
  where
    sorted :: v a
sorted = forall e (v :: * -> *). (Ord e, Vector v e) => v e -> v e
gsort v a
ests
    n :: Double
n  = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v a
ests
    c :: Double
c  = Double
n forall a. Num a => a -> a -> a
* (forall a. CL a -> a
significanceLevel CL Double
cl forall a. Fractional a => a -> a -> a
/ Double
2)
    -- FIXME: can we have better estimates of quantiles in case when p
    --        is not multiple of 1/N
    --
    -- FIXME: we could have undercoverage here
    lo :: Int
lo = forall a b. (RealFrac a, Integral b) => a -> b
round Double
c
    hi :: Int
hi = forall a b. (RealFrac a, Integral b) => a -> b
truncate (Double
n forall a. Num a => a -> a -> a
- Double
c)

-- $references
--
-- * Davison, A.C; Hinkley, D.V. (1997) Bootstrap methods and their
--   application. <http://statwww.epfl.ch/davison/BMA/>