{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module    : Statistics.Types
-- Copyright : (c) 2009 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- Data types common used in statistics
module Statistics.Types
    ( -- * Confidence level
      CL
      -- ** Accessors
    , confidenceLevel
    , significanceLevel
      -- ** Constructors
    , mkCL
      -- ** Constants and conversion to nσ
    , cl95
      -- * Estimates and upper/lower limits
    , Estimate(..)
    -- , NormalErr(..)
    , ConfInt(..)
      -- ** Constructors
    -- , estimateNormErr
    , estimateFromInterval
    , estimateFromErr
      -- ** Accessors
    , confidenceInterval
    , Scale(..)
      -- * Other
    , Sample
    ) where

import Control.DeepSeq              (NFData(..))
import Data.Data                    (Data,Typeable)
import Data.Maybe                   (fromMaybe)
import GHC.Generics                 (Generic)

#if __GLASGOW_HASKELL__ == 704
import qualified Data.Vector.Generic
import qualified Data.Vector.Generic.Mutable
#endif

import Statistics.Internal
import Statistics.Types.Internal


----------------------------------------------------------------
-- Data type for confidence level
----------------------------------------------------------------

-- |
-- Confidence level. In context of confidence intervals it's
-- probability of said interval covering true value of measured
-- value. In context of statistical tests it's @1-α@ where α is
-- significance of test.
--
-- Since confidence level are usually close to 1 they are stored as
-- @1-CL@ internally. There are two smart constructors for @CL@:
-- 'mkCL' and 'mkCLFromSignificance' (and corresponding variant
-- returning @Maybe@). First creates @CL@ from confidence level and
-- second from @1 - CL@ or significance level.
--
-- >>> cl95
-- mkCLFromSignificance 0.05
--
-- Prior to 0.14 confidence levels were passed to function as plain
-- @Doubles@. Use 'mkCL' to convert them to @CL@.
newtype CL a = CL a
               deriving (Eq, Typeable, Data, Generic)

instance Show a => Show (CL a) where
  showsPrec n (CL p) = defaultShow1 "mkCLFromSignificance" p n
instance (Num a, Ord a, Read a) => Read (CL a) where
  readPrec = defaultReadPrecM1 "mkCLFromSignificance" mkCLFromSignificanceE

instance NFData   a => NFData   (CL a) where
  rnf (CL a) = rnf a

-- |
-- >>> cl95 > cl90
-- True
instance Ord a => Ord (CL a) where
  CL a <  CL b = a >  b
  CL a <= CL b = a >= b
  CL a >  CL b = a <  b
  CL a >= CL b = a <= b
  max (CL a) (CL b) = CL (min a b)
  min (CL a) (CL b) = CL (max a b)


-- | Create confidence level from probability β or probability
--   confidence interval contain true value of estimate. Will throw
--   exception if parameter is out of [0,1] range
--
-- >>> mkCL 0.95    -- same as cl95
-- mkCLFromSignificance 0.05
mkCL :: (Ord a, Num a) => a -> CL a
mkCL
  = fromMaybe (error "Statistics.Types.mkCL: probability is out if [0,1] range")
  . mkCLE

-- | Same as 'mkCL' but returns @Nothing@ instead of error if
--   parameter is out of [0,1] range
--
-- >>> mkCLE 0.95    -- same as cl95
-- Just (mkCLFromSignificance 0.05)
mkCLE :: (Ord a, Num a) => a -> Maybe (CL a)
mkCLE p
  | p >= 0 && p <= 1 = Just $ CL (1 - p)
  | otherwise        = Nothing

-- | Same as 'mkCLFromSignificance' but returns @Nothing@ instead of error if
--   parameter is out of [0,1] range
--
-- >>> mkCLFromSignificanceE 0.05    -- same as cl95
-- Just (mkCLFromSignificance 0.05)
mkCLFromSignificanceE :: (Ord a, Num a) => a -> Maybe (CL a)
mkCLFromSignificanceE p
  | p >= 0 && p <= 1 = Just $ CL p
  | otherwise        = Nothing

-- | Get confidence level. This function is subject to rounding
--   errors. If @1 - CL@ is needed use 'significanceLevel' instead
confidenceLevel :: (Num a) => CL a -> a
confidenceLevel (CL p) = 1 - p

-- | Get significance level.
significanceLevel :: CL a -> a
significanceLevel (CL p) = p


-- | 95% confidence level
cl95 :: Fractional a => CL a
cl95 = CL 0.05

----------------------------------------------------------------
-- Data type for p-value
----------------------------------------------------------------

-- | Newtype wrapper for p-value.
newtype PValue a = PValue a
               deriving (Eq,Ord, Typeable, Data, Generic)

instance Show a => Show (PValue a) where
  showsPrec n (PValue p) = defaultShow1 "mkPValue" p n
instance (Num a, Ord a, Read a) => Read (PValue a) where
  readPrec = defaultReadPrecM1 "mkPValue" mkPValueE

instance NFData a => NFData (PValue a) where
  rnf (PValue a) = rnf a


-- | Construct PValue. Returns @Nothing@ if argument is out of [0,1] range.
mkPValueE :: (Ord a, Num a) => a -> Maybe (PValue a)
mkPValueE p
  | p >= 0 && p <= 1 = Just $ PValue p
  | otherwise        = Nothing

----------------------------------------------------------------
-- Point estimates
----------------------------------------------------------------

-- |
-- A point estimate and its confidence interval. It's parametrized by
-- both error type @e@ and value type @a@. This module provides two
-- types of error: 'NormalErr' for normally distributed errors and
-- 'ConfInt' for error with normal distribution. See their
-- documentation for more details.
--
-- For example @144 ± 5@ (assuming normality) could be expressed as
--
-- > Estimate { estPoint = 144
-- >          , estError = NormalErr 5
-- >          }
--
-- Or if we want to express @144 + 6 - 4@ at CL95 we could write:
--
-- > Estimate { estPoint = 144
-- >          , estError = ConfInt
-- >                       { confIntLDX = 4
-- >                       , confIntUDX = 6
-- >                       , confIntCL  = cl95
-- >                       }
--
-- Prior to statistics 0.14 @Estimate@ data type used following definition:
--
-- > data Estimate = Estimate {
-- >      estPoint           :: {-# UNPACK #-} !Double
-- >    , estLowerBound      :: {-# UNPACK #-} !Double
-- >    , estUpperBound      :: {-# UNPACK #-} !Double
-- >    , estConfidenceLevel :: {-# UNPACK #-} !Double
-- >    }
--
-- Now type @Estimate ConfInt Double@ should be used instead. Function
-- 'estimateFromInterval' allow to easily construct estimate from same inputs.
data Estimate e a = Estimate
    { estPoint           :: !a
      -- ^ Point estimate.
    , estError           :: !(e a)
      -- ^ Confidence interval for estimate.
    } deriving (Eq, Read, Show, Generic
#if __GLASGOW_HASKELL__ >= 708
               , Typeable, Data
#endif
               )

instance (NFData   (e a), NFData   a) => NFData   (Estimate e a) where
    rnf (Estimate x dx) = rnf x `seq` rnf dx


-- | Confidence interval. It assumes that confidence interval forms
--   single interval and isn't set of disjoint intervals.
data ConfInt a = ConfInt
  { confIntLDX :: !a
    -- ^ Lower error estimate, or distance between point estimate and
    --   lower bound of confidence interval.
  , confIntUDX :: !a
    -- ^ Upper error estimate, or distance between point estimate and
    --   upper bound of confidence interval.
  , confIntCL  :: !(CL Double)
    -- ^ Confidence level corresponding to given confidence interval.
  }
  deriving (Read,Show,Eq,Typeable,Data,Generic)

instance NFData   a => NFData   (ConfInt a) where
    rnf (ConfInt x y _) = rnf x `seq` rnf y


----------------------------------------
-- Constructors

-- | Create estimate with asymmetric error.
estimateFromErr
  :: a                     -- ^ Central estimate
  -> (a,a)                 -- ^ Lower and upper errors. Both should be
                           --   positive but it's not checked.
  -> CL Double             -- ^ Confidence level for interval
  -> Estimate ConfInt a
estimateFromErr x (ldx,udx) cl = Estimate x (ConfInt ldx udx cl)

-- | Create estimate with asymmetric error.
estimateFromInterval
  :: Num a
  => a                     -- ^ Point estimate. Should lie within
                           --   interval but it's not checked.
  -> (a,a)                 -- ^ Lower and upper bounds of interval
  -> CL Double             -- ^ Confidence level for interval
  -> Estimate ConfInt a
estimateFromInterval x (lx,ux) cl
  = Estimate x (ConfInt (x-lx) (ux-x) cl)


----------------------------------------
-- Accessors

-- | Get confidence interval
confidenceInterval :: Num a => Estimate ConfInt a -> (a,a)
confidenceInterval (Estimate x (ConfInt ldx udx _))
  = (x - ldx, x + udx)


-- | Data types which could be multiplied by constant.
class Scale e where
  scale :: (Ord a, Num a) => a -> e a -> e a

instance Scale ConfInt where
  scale a (ConfInt l u cl) | a >= 0    = ConfInt  (a*l)  (a*u) cl
                           | otherwise = ConfInt (-a*u) (-a*l) cl

instance Scale e => Scale (Estimate e) where
  scale a (Estimate x dx) = Estimate (a*x) (scale a dx)