{-# LANGUAGE RecordWildCards #-} -- | Passive-aggressive optimization. Mainly based on: -- -- Zakov, Shay and Goldberg, Yoav and Elhaded, Michael and Ziv-Ukelson, Michal -- "Rich Parameterization Improves RNA Structure Prediction" -- RECOMB 2011 -- -- and -- -- Crammer, Koby and (et al) -- "Online Passive-Aggressive Algorithms" -- Journal of Machine Learning Research (2006) -- -- TODO as always: move out of here and put in its own library module BioInf.PassiveAggressive where import qualified Data.Vector.Unboxed as VU import Data.List as L import Data.Set as S import Control.Arrow import Data.Map as M import Biobase.TrainingData import BioInf.Keys import qualified BioInf.Params as P import qualified BioInf.Params.Import as P import qualified BioInf.Params.Export as P import Statistics.ConfusionMatrix import Statistics.PerformanceMetrics import Data.PrimitiveArray as PA import Data.PrimitiveArray.Ix -- | Default implementation of P/A. defaultPA :: Double -> P.Params -> TrainingData -> (P.Params,Double,Double,[(Int,Double)]) defaultPA aggressiveness params td@TrainingData{..} -- | kScore+0.02 < pScore = error $ show (pScore,kScore,pOnly,kOnly,tau,changes) -- | pScore > kScore = error "foo" | L.null $ pOnly++kOnly = (params,0,1,[]) | sty >= 0.999 = (params,0,1,[]) -- | otherwise = error $ show (pOnly,kOnly,kScore,pScore,tau,changes) | otherwise = ( heck , tau , sty , changes ) where new1 = P.fromList . VU.toList $ VU.accum (\v pm -> v+pm) cur changes new2 = P.fromList . VU.toList $ VU.accum (\v pm -> v+pm) (VU.fromList $ P.toList new1) [] heck | P.toList new1 == P.toList new2 = new1 | otherwise = error "fuck" -- ignore this line ;-) (impressive, that you actually read this code!) pFeatures = featureVector primary predicted kFeatures = featureVector primary secondary pOnly = pFeatures L.\\ kFeatures kOnly = kFeatures L.\\ pFeatures numChanges = genericLength $ pOnly ++ kOnly cur = VU.fromList . P.toList $ params pScore = sum . L.map (cur VU.!) $ pFeatures kScore = sum . L.map (cur VU.!) $ kFeatures pScore2 = sum . L.map (cur VU.!) $ pFeatures kScore2 = sum . L.map (cur VU.!) $ kFeatures tau | abs ((kScore2 - pScore2) - (kScore-pScore)) > 0.1 = error $ "abs: \n" ++ z | val < 0 = error $ "val<0 \n" ++ z | sty >= 0.999 = 0 | otherwise = val -- 100 * val where val = min aggressiveness $ (kScore - pScore + sqrt (1-sty)) / (numChanges ^ 2) z = show ( kScore,pScore,kScore - pScore , kScore2,pScore2, kScore2 - pScore2 ) ++ "\n" ++ primary ++ "\n" ++ (concat $ intersperse "\n" comments) ++ "\n" ++ ( L.concatMap (\x -> show x ++ "\n") $ L.map (fun &&& (cur VU.!)) kOnly ) ++ " <<<\n" ++ ( L.concatMap (\x -> show x ++ "\n") $ L.map (fun &&& (cur VU.!)) pOnly ) ++ " ALL\n" ++ ( L.concatMap (\x -> show x ++ "\n") $ L.map (fun &&& (cur VU.!)) pFeatures) fun i = let lol = vks M.! i in (lol, fun2 lol) fun2 hc@(HairpinClose k) = P.hairpinClose params PA.! k fun2 hl@(HairpinLength l) = P.hairpinLength params PA.! l fun2 _ = (-1) sty = case fmeasure (mkConfusionMatrix td) of -- currently optimizing using F_1 Left _ -> 1 Right v -> v changes = zip kOnly (repeat $ negate tau) ++ zip pOnly (repeat tau) -- | Pull in the statistical interface. From the confusion matrix, we -- automagically get everything we need. -- -- NOTE Unfortunately, StatisticalMethods has heavy dependencies. instance MkConfusionMatrix TrainingData where mkConfusionMatrix TrainingData{..} = ConfusionMatrix { fn = Right . fromIntegral . S.size $ k `S.difference` p , fp = Right . fromIntegral . S.size $ p `S.difference` k , tn = Right . fromIntegral $ allPs - S.size (k `S.union` p) , tp = Right . fromIntegral . S.size $ k `S.intersection` p } where k = S.fromList secondary p = S.fromList predicted allPs = ((length primary) * (length primary -1)) `div` 2