{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}
module BenchShow.Analysis
    ( OutlierEffect(..)
    , OutlierVariance(..)
    , countOutliers
    , Estimator(..)
    , AnalyzedField(..)
    , getAnalyzedValue
    , BenchmarkMatrix(..)
    , BenchmarkIterMatrix(..)
    , foldBenchmark
    , filterSamples
    , isMaxField
    ) where
import Control.Applicative
import Data.Char (toLower)
import Data.Data (Data, Typeable)
import Data.Int (Int64)
import Data.List (elemIndex, transpose)
import Data.Maybe (fromMaybe)
import Data.Traversable
import GHC.Generics (Generic)
import Statistics.Function (sort)
import Statistics.Quantile (weightedAvg)
import Statistics.Regression (bootstrapRegress, olsRegress)
import Statistics.Resampling (resample)
import Statistics.Resampling.Bootstrap (bootstrapBCA)
import Statistics.Sample (mean, stdDev)
import Statistics.Sample.KernelDensity (kde)
import Statistics.Types (Sample, Estimate(..), ConfInt(..), cl95, CL)
import System.Random.MWC (GenIO, createSystemRandom)
import Prelude hiding (sequence, mapM)
import qualified Statistics.Resampling as St
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
data Outliers = Outliers {
      samplesSeen :: !Int64
    , lowSevere   :: !Int64
    
    
    , lowMild     :: !Int64
    
    , highMild    :: !Int64
    
    , highSevere  :: !Int64
    
    } deriving (Eq, Show, Typeable, Data, Generic)
data OutlierEffect = Unaffected 
                   | Slight     
                   | Moderate   
                   | Severe     
                                
                     deriving (Eq, Ord, Show, Typeable, Data, Generic)
outliersEmpty :: Outliers
outliersEmpty = Outliers 0 0 0 0 0
addOutliers :: Outliers -> Outliers -> Outliers
addOutliers (Outliers s a b c d) (Outliers t w x y z) =
    Outliers (s+t) (a+w) (b+x) (c+y) (d+z)
{-# INLINE addOutliers #-}
data OutlierVariance = OutlierVariance {
      ovEffect   :: !OutlierEffect
    
    , ovDesc     :: !String
    
    , ovFraction :: !Double
    
    } deriving (Eq, Show, Typeable, Data, Generic)
classifyOutliers :: Sample -> Outliers
classifyOutliers sa = U.foldl' ((. outlier) . addOutliers) outliersEmpty ssa
    where outlier e = Outliers
                { samplesSeen = 1
                , lowSevere = if e <= loS && e < hiM then 1 else 0
                , lowMild = if e > loS && e <= loM then 1 else 0
                , highMild = if e >= hiM && e < hiS then 1 else 0
                , highSevere = if e >= hiS && e > loM then 1 else 0
                }
          !loS = q1 - (iqr * 3)
          !loM = q1 - (iqr * 1.5)
          !hiM = q3 + (iqr * 1.5)
          !hiS = q3 + (iqr * 3)
          q1   = weightedAvg 1 4 ssa
          q3   = weightedAvg 3 4 ssa
          ssa  = sort sa
          iqr  = q3 - q1
outlierVariance
  :: Double 
  -> Double 
  -> Double 
  -> OutlierVariance
outlierVariance µ σ a = OutlierVariance effect desc varOutMin
    where
    µa    = µ / a
    µgMin = µa / 2
    σg2   = σg * σg where σg = min (µgMin / 4) (σ / sqrt a)
    σ2    = σ * σ
    varOut c  = (ac / a) * (σ2 - ac * σg2) where ac = a - c
    cMax x    = fromIntegral (floor (-2 * k0 / (k1 + sqrt det)) :: Int)
        where
        ad = a * d
            where
            d = k * k
            k = µa - x
        k0    = -a * ad
        k1    = σ2 - a * σg2 + ad
        det   = k1 * k1 - 4 * σg2 * k0
    minBy f q r = min (f q) (f r)
    varOutMin = if σ2 == 0
                then 0
                else (minBy varOut 1 (minBy cMax 0 µgMin)) / σ2
    (effect, desc) | varOutMin < 0.01 = (Unaffected, "no")
                   | varOutMin < 0.1  = (Slight,     "slight")
                   | varOutMin < 0.5  = (Moderate,   "moderate")
                   | otherwise        = (Severe,     "severe")
countOutliers :: Outliers -> Int64
countOutliers (Outliers _ a b c d) = a + b + c + d
{-# INLINE countOutliers #-}
useRegression :: Bool
useRegression = True
useBootstrap :: Bool
useBootstrap = True
resampleCount :: Int
resampleCount = 1000
confidence :: CL Double
confidence = cl95
regress
    :: GenIO
    -> Int  
            
    -> [String] 
    -> [([Double], [Double])]
    -> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
regress randGen i rcols samples = do
    
    
    let predVectors = map U.fromList $ transpose $ map fst samples
        regressWithIters = mapM (bootstrapRegress randGen resampleCount
                                confidence olsRegress predVectors)
    let avoidMaxFields name vec =
            if isMaxField name
            then Nothing
            else Just vec
    let respVectors = map U.fromList $ transpose $ map snd samples
    res <- mapM regressWithIters (zipWith avoidMaxFields rcols respVectors)
    return $ map (fmap (\(v,r2) -> ((G.toList v) !! i, r2))) res
estimateMeanAndStdDev
    :: GenIO
    -> [U.Vector Double]
    -> IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
estimateMeanAndStdDev randGen vectors = do
    let resamp = resample randGen [St.Mean, St.StdDev] resampleCount
    res <- mapM resamp vectors
    return $ fmap (\[mn,dev] -> (mn, dev))
        $ getZipList
        $ bootstrapBCA confidence
            <$> ZipList vectors
            <*> ZipList res
isMaxField :: String -> Bool
isMaxField fieldName = map toLower fieldName == "maxrss"
rescaleIteration :: Int -> [String] -> ([Double], [Double]) -> [Double]
rescaleIteration idx rcols (pvals, vals) =
    let iter = pvals !! idx
    in zipWith ($) (map ($ iter) foldFields) vals
    where
    getMeanOrMax fname i val =
        if isMaxField fname
        then val
        else val / i
    foldFields = map getMeanOrMax rcols
data AnalyzedField = AnalyzedField
    { analyzedMean       :: !Double
    , analyzedStdDev     :: !Double
    , analyzedMedian     :: !Double
    , analyzedOutliers   :: !Outliers
    , analyzedOutlierVar :: !OutlierVariance
    , analyzedKDE        :: !(U.Vector Double, U.Vector Double)
    , analyzedRegCoeff   :: Maybe (Estimate ConfInt Double)
    , analyzedRegRSq     :: Maybe (Estimate ConfInt Double)
    } deriving Show
data Estimator =
      Median        
                    
                    
                    
    | Mean          
                    
    | Regression    
                    
                    
                    
                    
                    
                    
    deriving (Eq, Show)
getAnalyzedValue :: Estimator -> AnalyzedField -> Double
getAnalyzedValue estimator AnalyzedField{..} =
    case estimator of
        Median -> analyzedMedian
        Mean -> analyzedMean
        Regression ->
            case analyzedRegCoeff of
                Nothing -> analyzedMean
                Just x -> estPoint x
analyzeBenchmark :: GenIO
                 -> [String]
                 -> [String]
                 -> [([Double], [Double])]
                 -> IO [AnalyzedField]
analyzeBenchmark randGen pcols rcols samples = do
    let sampleCnt = length samples
        i = fromMaybe (error "bug") $ elemIndex "iters" pcols
        vectors = map U.fromList
            $ transpose
            $ map (rescaleIteration i rcols) samples
    (coeffs, r2s) <-
        
        if useRegression && length samples >= (length pcols + 1)
        then do
            let f (Just (x, y)) = (Just x, Just y)
                f Nothing = (Nothing, Nothing)
            fmap (unzip . map f) $ regress randGen i rcols samples
        else do
            let n = length rcols
            return (replicate n Nothing, replicate n Nothing)
    (means, devs) <-
        if useBootstrap
        then do
            (ms, ds) <- fmap unzip $ estimateMeanAndStdDev randGen vectors
            return (map estPoint ms, map estPoint ds)
        else do
            
            let ms = map mean vectors
                ds = map stdDev vectors
            return (ms, ds)
    let len = U.length $ head vectors
        median v = (sort v) U.! (len `div` 2)
        medians = map median vectors
        outliers = getZipList $ classifyOutliers <$> ZipList vectors
        outlierVars = getZipList
                $ outlierVariance
                    <$> ZipList means
                    <*> ZipList devs
                    <*> pure (fromIntegral sampleCnt)
        kdes = map (kde 128) vectors
    return $ getZipList $ AnalyzedField
        <$> ZipList means
        <*> ZipList devs
        <*> ZipList medians
        <*> ZipList outliers
        <*> ZipList outlierVars
        <*> ZipList kdes
        <*> ZipList coeffs
        <*> ZipList r2s
data BenchmarkIterMatrix = BenchmarkIterMatrix
    { iterPredColNames  :: ![String]  
    , iterRespColNames  :: ![String]  
    
    , iterRowValues :: ![(String, [([Double], [Double])])]
    } deriving Show
data BenchmarkMatrix = BenchmarkMatrix
    { colNames  :: ![String]
    , rowValues :: ![(String, [AnalyzedField])] 
    } deriving Show
foldBenchmark :: BenchmarkIterMatrix -> IO BenchmarkMatrix
foldBenchmark BenchmarkIterMatrix{..} = do
    randGen <- createSystemRandom
    rows <- mapM (foldIters randGen) iterRowValues
    return $ BenchmarkMatrix
        { colNames = iterRespColNames
        , rowValues = rows
        }
    where
    foldIters randGen (name, vals) = do
            vals' <- analyzeBenchmark randGen iterPredColNames
                                      iterRespColNames vals
            return (name, vals')
filterSamples :: BenchmarkIterMatrix -> BenchmarkIterMatrix
filterSamples matrix@BenchmarkIterMatrix{..} =
    matrix