{-# LANGUAGE RecordWildCards #-} -- | Prospective 'TrainingData' elements need to be filtered as there are a -- number of entries which do not provide good training. module Biobase.TrainingData.Filter where import Data.List import qualified Data.Vector.Unboxed as VU import Biobase.TrainingData import Biobase.TrainingData.Manip -- | Filter out elements containing not enough base pairs (in relative terms) fMinRelPairs :: Maybe Double -> TDmanip -> TDmanip fMinRelPairs Nothing x = x fMinRelPairs _ l@(Left _) = l fMinRelPairs (Just rel) r@(Right td@(TrainingData{..})) | rel > numps * 2 / lenpri = Left td | otherwise = r where lenpri = genericLength primary numps = genericLength secondary -- | Error-checking filter. fErrorCheck :: TDmanip -> TDmanip fErrorCheck l@(Left _) = l fErrorCheck r@(Right td@(TrainingData{..})) | any (<0) ixs = Left td -- that one is really strange | any (>=l) ixs = Left td -- indices out of bounds | any ((=='&') . (v VU.!)) ixs = Left td -- index pointing to intramolecular symbol | otherwise = r where ixs = concatMap (\((i,j),_) -> [i,j]) secondary l = length primary v = VU.fromList primary