{-# LANGUAGE RecordWildCards #-}

module Biobase.TrainingData.Manip where

import Data.List (sort,sortBy)
import Data.Ord (comparing)

import Biobase.Secondary.PseudoKnots

import Biobase.TrainingData



-- | Left elements are filtered out, Right elements are kept.

type TDmanip = Either TrainingData TrainingData

-- | Remove pseudoknots from 'TrainingData'.

removePK rpk td@TrainingData{..}
  | not rpk   = td
  | otherwise = td{secondary = removeByCounting secondary}

-- | Remove triplets from training data. "rmTs" will check each extPair and
-- remove it, if it is the worst in a triplet. If not, the pair is rotated to
-- the last position and we continue. In the non-triplet case, we simply remove
-- the pair from consideration and put it into the output (x:).

fRemoveTriplets False td = td
fRemoveTriplets True td@TrainingData{..} = td{secondary = sort $ rmTs secondary} where
  rmTs [] = []
  rmTs (x:xs)
    | length ts == 1 = if worst x ts
                         then rmTs xs
                         else rmTs . filter (/= head ts) $ x:xs -- analysis of which one is worse
    | length ts >= 2 = rmTs xs -- always remove a double triplet "X": (X)
    | otherwise = x : rmTs xs
    where
      ts = triplets x xs
      triplets ((i,j),_) zs = filter (\((k,l),_) -> i==k || i==l || j==k || j==l) zs
      worst z zs = last (sortBy (comparing snd) $ z : zs) == z