{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE BangPatterns #-} module Statistics ( statTheta , statTask , bayes ) where import Irt import Likelihood import Types import Control.Arrow import Control.Monad import Control.Monad.ST.Safe import Data.List import qualified Data.Vector.Generic as V import qualified Data.Vector.Unboxed as UV import Statistics.Sample import System.Random.MWC statTheta :: StatisticType -> ContestantsData -> IO Statistic statTheta Count xs = return . SingleStatistic . map (fromIntegral . V.length . snd) $ xs statTheta SolvedProp xs = return . SingleStatistic . map (prop . snd) $ xs where prop = mean . V.map (ok . snd) ok True = 1 ok False = 0 statTheta LogLikelihood xs = return . SingleStatistic . map (thetaVSumMap logLikelihood) $ xs statTheta DLogLikelihood xs = return . ListStatistic ["ELogLikelihood", "DLogLikelihood"] . map (thetaVSumMap edlogl) $ xs where edlogl _ d t = [elog, dlog] where lt = logLikelihood False d t lf = logLikelihood True d t elog = lt * exp lt + lf * exp lf dlog = (lt-lf)^(2::Int) * (exp $ lt + lf) / (1+paramC d)^(3::Int) statTheta FisherSEM xs = return . SingleStatistic . map fish $ xs where fish = (1 /) . sqrt . thetaVSumMap inf2 inf2 _r d t = (-paramA d) * (paramA d) * logLikelihood't True d t * logLikelihood't False d t statTheta Bootstrap xss = withSystemRandom $ asGenST calc where calc g = do seed <- save g return $ ListStatistic ["BootstrapSEM", "BootstrapLb", "BootstrapUb"] . map (bootstrap seed 100000) $ xss stat :: [Double] -> (Double, Double, Double) stat xs = (sem, lb, ub) where n = length xs sem = sqrt . varianceUnbiased . UV.fromList $ xs confidence = 0.95 :: Rational dist = (n - round (fromIntegral n * confidence)) `div` 2 select = (!! dist) (lb, ub) = (select &&& (select . reverse)) . sort $ xs bootstrap seed n xs = [err,lb,ub] where (err,lb,ub) = runST $ do g <- restore seed v <- bootstrap' g n xs return $ stat v bootstrap' g n (t,xs) = replicateM n $ do xs' <- resample g xs return $! estTheta t xs' resample g xs = return . V.map (xs V.!) =<< V.replicateM l (uniformR (0,l-1) g) where l = V.length xs statTask :: StatisticType -> TasksData -> IO Statistic statTask Count xs = return . SingleStatistic . map (fromIntegral . V.length . snd) $ xs statTask SolvedProp xs = return . SingleStatistic . map (prop . snd) $ xs where prop = mean . V.map (ok . snd) ok True = 1 ok False = 0 statTask LogLikelihood xs = return . SingleStatistic . map (paramVSumMap logLikelihood) $ xs statTask DLogLikelihood xs = return . ListStatistic ["ELogLikelihood", "DLogLikelihood"] . map (paramVSumMap edlogl) $ xs where edlogl _ d t = [elog, dlog] where lt = logLikelihood False d t lf = logLikelihood True d t elog = lt * exp lt + lf * exp lf dlog = (lt-lf)^(2::Int) * (exp $ lt + lf) / (1+paramC d)^(3::Int) bayes :: Double -> ContestantData -> ((Double, Double), [(Double, Double)]) bayes confidence (theta, tasks) = ((lbound, ubound), distribution) where pick (lh, _, dt) = (p, p * dt) where p = exp lh fg t = pick $ thetaVSumMap logL (t,tasks) maxp = fst . fg $ theta aintegral = integrate $ evaluate 0.3 (0.1*maxp) (fst thetaBound) (snd thetaBound) points = evaluate 0.3 (0.002*aintegral) (fst thetaBound) (snd thetaBound) integral = integrate points distribution :: [(Double, Double)] distribution = map (second (/integral)) points percentile = (1 - confidence) / 2 lbound = bound percentile distribution ubound = bound percentile (reverse distribution) evaluate maxx maxv x0 x1 = evaluate' l1 r1 ++ [second fst r1] where l1 = (x0, fg x0) r1 = (x1, fg x1) split l r x = evaluate' l m ++ evaluate' m r where m = (x, fg x) evaluate' l@(xl, (vl, dl)) r@(xr, (vr, dr)) | xr - xl > maxx * 1.01 = split l r (xl + maxx) | (xr - xl) * (abs (dv - dl) + abs (dv - dr)) > maxv = split l r ((xl + xr) / 2) | otherwise = [(xl, vl)] where dv = (vr - vl) / (xr - xl) integrate :: [(Double, Double)] -> Double integrate = integrate' 0 where integrate' !c ((xl,vl):next@((xr,vr):_)) = integrate' (c + (xr - xl) * (vl + vr) / 2) next integrate' !c _ = c bound !c ((xl, vl):next@((xr,vr):_)) | c > v = bound (c-v) next | otherwise = xl + (xr - xl) * c / v where v = abs $ (xr - xl) * (vl + vr) / 2