{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module Statistics
    ( statTheta
    , statTask
    , bayes
    ) where

import Irt
import Likelihood
import Types

import Control.Arrow
import Control.Monad
import Control.Monad.ST.Safe
import Data.List
import qualified Data.Vector.Generic as V
import qualified Data.Vector.Unboxed as UV
import Statistics.Sample
import System.Random.MWC

statTheta :: StatisticType -> ContestantsData -> IO Statistic
statTheta Count xs = return . SingleStatistic . map (fromIntegral . V.length . snd) $ xs
statTheta SolvedProp xs = return . SingleStatistic . map (prop . snd) $ xs
  where
    prop = mean . V.map (ok . snd)
    ok True = 1
    ok False = 0
statTheta LogLikelihood xs = return . SingleStatistic . map (thetaVSumMap logLikelihood) $ xs
statTheta DLogLikelihood xs = return . ListStatistic ["ELogLikelihood", "DLogLikelihood"] .
                                map (thetaVSumMap edlogl) $ xs
  where
    edlogl _ d t = [elog, dlog]
      where
        lt = logLikelihood False d t
        lf = logLikelihood True d t
        elog = lt * exp lt + lf * exp lf
        dlog = (lt-lf)^(2::Int) * (exp $ lt + lf) / (1+paramC d)^(3::Int)
statTheta FisherSEM xs = return . SingleStatistic . map fish $ xs
  where
    fish = (1 /) . sqrt . thetaVSumMap inf2
    inf2 _r d t = (-paramA d) * (paramA d) * logLikelihood't True  d t
                                           * logLikelihood't False d t
statTheta Bootstrap xss = withSystemRandom $ asGenST calc
  where
    calc g = do
        seed <- save g
        return $ ListStatistic ["BootstrapSEM", "BootstrapLb", "BootstrapUb"]
                    . map (bootstrap seed 100000) $ xss

    stat :: [Double] -> (Double, Double, Double)
    stat xs = (sem, lb, ub)
      where
        n = length xs
        sem = sqrt . varianceUnbiased . UV.fromList $ xs
        confidence = 0.95 :: Rational
        dist = (n - round (fromIntegral n * confidence)) `div` 2
        select = (!! dist)
        (lb, ub) = (select &&& (select . reverse)) . sort $ xs


    bootstrap seed n xs = [err,lb,ub]
      where
        (err,lb,ub) = runST $ do
            g <- restore seed
            v <- bootstrap' g n xs
            return $ stat v

    bootstrap' g n (t,xs) = replicateM n $ do
        xs' <- resample g xs
        return $! estTheta t xs'

    resample g xs = return . V.map (xs V.!) =<< V.replicateM l (uniformR (0,l-1) g)
        where l = V.length xs


statTask :: StatisticType -> TasksData -> IO Statistic
statTask Count xs = return . SingleStatistic . map (fromIntegral . V.length . snd) $ xs
statTask SolvedProp xs = return . SingleStatistic . map (prop . snd) $ xs
  where
    prop = mean . V.map (ok . snd)
    ok True = 1
    ok False = 0
statTask LogLikelihood xs = return . SingleStatistic . map (paramVSumMap logLikelihood) $ xs
statTask DLogLikelihood xs = return . ListStatistic ["ELogLikelihood", "DLogLikelihood"] .
                                map (paramVSumMap edlogl) $ xs
  where
    edlogl _ d t = [elog, dlog]
      where
        lt = logLikelihood False d t
        lf = logLikelihood True d t
        elog = lt * exp lt + lf * exp lf
        dlog = (lt-lf)^(2::Int) * (exp $ lt + lf) / (1+paramC d)^(3::Int)

bayes :: Double -> ContestantData -> ((Double, Double), [(Double, Double)])
bayes confidence (theta, tasks) = ((lbound, ubound), distribution)
  where
    pick (lh, _, dt) = (p, p * dt)
      where
        p = exp lh
    fg t = pick $ thetaVSumMap logL (t,tasks)
    maxp = fst . fg $ theta

    aintegral = integrate $ evaluate 0.3 (0.1*maxp) (fst thetaBound) (snd thetaBound)
    points = evaluate 0.3 (0.002*aintegral) (fst thetaBound) (snd thetaBound)
    integral = integrate points
    distribution :: [(Double, Double)]
    distribution = map (second (/integral)) points
    percentile = (1 - confidence) / 2
    lbound = bound percentile distribution
    ubound = bound percentile (reverse distribution)

    evaluate maxx maxv x0 x1 = evaluate' l1 r1 ++ [second fst r1]
      where
        l1 = (x0, fg x0)
        r1 = (x1, fg x1)
        split l r x = evaluate' l m ++ evaluate' m r
              where m = (x, fg x)
        evaluate' l@(xl, (vl, dl)) r@(xr, (vr, dr))
            | xr - xl > maxx * 1.01 = split l r (xl + maxx)
            | (xr - xl) * (abs (dv - dl) + abs (dv - dr)) > maxv = split l r ((xl + xr) / 2)
            | otherwise = [(xl, vl)]
          where
            dv = (vr - vl) / (xr - xl)

    integrate :: [(Double, Double)] -> Double
    integrate = integrate' 0
      where
        integrate' !c ((xl,vl):next@((xr,vr):_)) = integrate' (c + (xr - xl) * (vl + vr) / 2) next
        integrate' !c _ = c

    bound !c ((xl, vl):next@((xr,vr):_))
        | c > v = bound (c-v) next
        | otherwise = xl + (xr - xl) * c / v
      where
        v = abs $ (xr - xl) * (vl + vr) / 2