module BioInf.PassiveAggressive where
import qualified Data.Vector.Unboxed as VU
import Data.List as L
import Data.Set as S
import Control.Arrow
import Data.Map as M
import Text.Printf
import Biobase.TrainingData
import BioInf.Keys
import qualified BioInf.Params as P
import qualified BioInf.Params.Import as P
import qualified BioInf.Params.Export as P
import Statistics.ConfusionMatrix
import Statistics.PerformanceMetrics
import Data.PrimitiveArray as PA
import Data.PrimitiveArray.Ix
defaultPA :: Double -> P.Params -> TrainingData -> (P.Params,Double,Double,[(Int,Double)])
defaultPA aggressiveness params td@TrainingData{..}
| L.null $ pOnly++kOnly = (params,0,1,[])
| sty >= 0.999 = (params,0,1,[])
| otherwise = ( new
, tau
, sty
, changes
)
where
new = P.fromList . VU.toList $ VU.accum (\v pm -> v+pm) cur changes
pFeatures = featureVector primary predicted
kFeatures = featureVector primary secondary
pOnly = pFeatures L.\\ kFeatures
kOnly = kFeatures L.\\ pFeatures
numChanges = genericLength $ pOnly ++ kOnly
changes = zip kOnly (repeat $ negate tau) ++ zip pOnly (repeat tau)
cur = VU.fromList . P.toList $ params
pScore = sum . L.map (cur VU.!) $ pFeatures
kScore = sum . L.map (cur VU.!) $ kFeatures
tau
| kScore + epsilon < pScore
= error $ "S(known) < S(predicted)\n" ++ errorKnownTooGood td cur kFeatures pFeatures
| sty > 0.999
&& kScore+epsilon < pScore
= error $ "S(known) < S(predicted)\n" ++ errorKnownTooGood td cur kFeatures pFeatures
| sty >= 0.999
= 0
| otherwise
= val
where
val = min aggressiveness $ (kScore pScore + sqrt (1sty)) / (numChanges ^ 2)
sty = case fmeasure (mkConfusionMatrix td) of
Left _ -> 1
Right v -> v
epsilon = 0.1
errorKnownTooGood TrainingData{..} curPs kFeatures pFeatures = z where
z = printf "S(known) = %7.4f, S(pred) = %7.4f, S(known) - S(pred) = %7.4f\n"
kScore pScore (kScore pScore)
++ printf "%s\n%s\n" primary (concat $ intersperse "\n" comments)
kScore = sum . L.map (curPs VU.!) $ kFeatures
pScore = sum . L.map (curPs VU.!) $ pFeatures
instance MkConfusionMatrix TrainingData where
mkConfusionMatrix TrainingData{..} = ConfusionMatrix
{ fn = Right . fromIntegral . S.size $ k `S.difference` p
, fp = Right . fromIntegral . S.size $ p `S.difference` k
, tn = Right . fromIntegral $ allPs S.size (k `S.union` p)
, tp = Right . fromIntegral . S.size $ k `S.intersection` p
} where
k = S.fromList secondary
p = S.fromList predicted
allPs = ((length primary) * (length primary 1)) `div` 2