{-# LANGUAGE RecordWildCards #-} module Biobase.TrainingData.Manip where import Data.List (sort,sortBy) import Data.Ord (comparing) import Biobase.Secondary.PseudoKnots import Biobase.TrainingData -- | Left elements are filtered out, Right elements are kept. type TDmanip = Either TrainingData TrainingData -- | Remove pseudoknots from 'TrainingData'. removePK rpk td@TrainingData{..} | not rpk = td | otherwise = td{secondary = removeByCounting secondary} -- | Remove triplets from training data. "rmTs" will check each extPair and -- remove it, if it is the worst in a triplet. If not, the pair is rotated to -- the last position and we continue. In the non-triplet case, we simply remove -- the pair from consideration and put it into the output (x:). fRemoveTriplets False td = td fRemoveTriplets True td@TrainingData{..} = td{secondary = sort $ rmTs secondary} where rmTs [] = [] rmTs (x:xs) | length ts == 1 = if worst x ts then rmTs xs else rmTs . filter (/= head ts) $ x:xs -- analysis of which one is worse | length ts >= 2 = rmTs xs -- always remove a double triplet "X": (X) | otherwise = x : rmTs xs where ts = triplets x xs triplets ((i,j),_) zs = filter (\((k,l),_) -> i==k || i==l || j==k || j==l) zs worst z zs = last (sortBy (comparing snd) $ z : zs) == z