{-# LANGUAGE FlexibleContexts #-}
---------------------------------------------------------------------------------
-- |
-- Module       : Amby.Numeric
-- Copyright    : (C) 2016 Justin Sermeno
-- License      : BSD3
-- Maintainer   : Justin Sermeno
--
-- Functions for working with numerical ranges and statistics.
---------------------------------------------------------------------------------
module Amby.Numeric
  ( -- * Ranges
    contDistrDomain
  , contDistrRange
  , linspace
  , arange
  , random

  -- * Frequencies
  , scoreAtPercentile
  , interquartileRange
  , freedmanDiaconisBins

  ) where

import Data.Vector.Generic ((!))
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Intro as V

import Statistics.Distribution
import System.Random.MWC (withSystemRandom, asGenST)

-- $setup
-- >>> let demoData = V.fromList $ concat $ zipWith replicate [5, 4, 3, 7] [0..3]

---------------------
-- Ranges
---------------------

-- | @contDistrDomain d n@ generates a domain of 'n' evenly spaced points
-- for the continuous distribution 'd'.
contDistrDomain :: (ContDistr d) => d -> Int -> U.Vector Double
contDistrDomain d num = linspace (quantile d 0.0001) (quantile d 0.9999) num

-- | @contDistrRange d xs@ generates the pdf value of the continious distribution
-- 'd' for each value in 'xs'.
contDistrRange :: (ContDistr d) => d -> U.Vector Double -> U.Vector Double
contDistrRange d x = U.map (density d) x

-- | @linspace s e n@ generates 'n' evenly spaced values between ['s', 'e'].
linspace :: Double -> Double -> Int -> U.Vector Double
linspace start stop num
  | num < 0 = error ("Number of samples, " ++ show num ++ ", must be non-negative.")
  | num == 0 || num == 1 = addStart $ U.generate num ((* delta) . fromIntegral)
  | otherwise = addStart $ U.generate num ((* step) . fromIntegral)
  where
    delta = stop - start
    step = delta / fromIntegral (num - 1)
    addStart = U.map (realToFrac . (+ start))

-- | @arange s e i@ generates numbers between ['s', 'e'] spaced by amount 'i'.
-- 'arange' is the equivalent of haskell's range notation except that it generates
-- a 'Vector'. As a result, the last element may be greater than less than, or
-- greater than the stop point.
arange :: Double -> Double -> Double -> U.Vector Double
arange start stop step = U.fromList [start,(start + step)..stop]

-- | Generates an unboxed vectors of random numbers from a distribution
-- that is an instance of 'ContGen'. This function is meant for ease of use
-- and is expensive.
random :: (ContGen d) => d -> Int -> IO (U.Vector Double)
random d n = withSystemRandom . asGenST $ \gen ->
  U.replicateM n $ genContVar d gen

---------------------
-- Frequencies
---------------------

-- | @scoreAtPercentile xs p@ calculates the score at percentile 'p'.
--
-- Examples:
--
-- >>> let a = arange 0 99 1
--
-- >>> scoreAtPercentile a 50
-- 49.5
scoreAtPercentile :: (G.Vector v Double) => v Double -> Double -> Double
scoreAtPercentile xs p
  | n == 0 = modErr "scoreAtPercentile" "Percentile sample size is 0"
  | p < 0 || p > 100 = modErr "scoreAtPercentile" "Percentile must be in [0, 100]"
  | otherwise = (getScore . G.modify V.sort) xs
  where
    n = G.length xs
    idx = (p / 100) * fromIntegral (n - 1)
    i = floor idx
    j = ceiling idx

    isInt :: Double -> Bool
    isInt a = fromIntegral (round a :: Int) == a

    interop :: (G.Vector v Double) => v Double -> Double
    interop vec =
      (vec ! i) * (fromIntegral j - idx) +
      (vec ! j) * (idx - fromIntegral i)

    getScore vec = if isInt idx
      then vec ! i
      else interop vec
{-# SPECIALIZE scoreAtPercentile :: U.Vector Double -> Double -> Double #-}
{-# SPECIALIZE scoreAtPercentile :: V.Vector Double -> Double -> Double #-}

-- | Calculate the interquartile range.
--
-- Examples:
--
-- >>> interquartileRange demoData
-- 2.5
interquartileRange :: (G.Vector v Double) => v Double -> Double
interquartileRange xs = scoreAtPercentile xs 75 - scoreAtPercentile xs 25
{-# SPECIALIZE interquartileRange :: U.Vector Double -> Double #-}
{-# SPECIALIZE interquartileRange :: V.Vector Double -> Double #-}

-- | Estimate a good default bin size.
--
-- Examples:
--
-- >>> freedmanDiaconisBins demoData
-- 2
freedmanDiaconisBins :: (G.Vector v Double) => v Double -> Int
freedmanDiaconisBins xs =
    if h == 0
      then floor $ sqrt $ (fromIntegral n :: Double)
      else ceiling $ (G.maximum xs - G.minimum xs) / h
  where
    n = G.length xs
    h = 2 * interquartileRange xs / fromIntegral n ** (1 / 3)

modErr :: String -> String -> a
modErr f err = error
  $ showString "Amby.Numeric."
  $ showString f err