{-# LANGUAGE RecordWildCards #-}

module Biobase.TrainingData.Manip where

import Data.List (sort,sortBy)
import Data.Ord (comparing)

import Biobase.Secondary.PseudoKnots

import Biobase.TrainingData



-- | Left elements are filtered out, Right elements are kept.

type TDmanip = Either TrainingData TrainingData

-- | Remove pseudoknots from 'TrainingData'.

removePK rpk td@TrainingData{..}
  | not rpk   = td
  | otherwise = td{secondary = removeByCounting secondary}

-- | Remove triplets from training data. "rmTs" will check each extPair and
-- remove it, if it is the worst in a triplet. If not, the pair is rotated to
-- the last position and we continue. In the non-triplet case, we simply remove
-- the pair from consideration and put it into the output (x:).

fRemoveTriplets False td = td
fRemoveTriplets True td@TrainingData{..} = td{secondary = sort $ rmTs secondary} where
  rmTs [] = []
  rmTs (x:xs)
    | ys <- triplets x xs
    , not $ null ys = if worst x ys then rmTs xs else rmTs xs++[x]
    | otherwise = x : rmTs xs
    where
      triplets ((i,j),_) zs = filter (\((k,l),_) -> i==k || i==l || j==k || j==l) zs
      worst z zs = last (sortBy (comparing snd) $ z : zs) == z