{-# LANGUAGE ParallelListComp #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeSynonymInstances #-} -- | We want to be able to generate secondary structure representations from a -- wide variety of inputs. -- -- TODO include DotBracket, CT, lists of pairs,... some lossy conversions from -- FamPair to CwwPair, etc... module Biobase.RNA.Pairs where import Control.DeepSeq import Data.Vector.Unboxed.Read import qualified Data.Vector.Unboxed as VU import Data.List (nub) -- | Canonical secondary structures, with the Watson-Crick cis pair family. class MkSecondary a where mkSecondary :: a -> Pairs CwwPair -- | Extended secondary structures, with any kind of pair family for each pair. class MkExtended a where mkExtended :: a -> Pairs ExtPair -- | Pair container. We store the sequence length as well in order to be able -- to reconstruct the unpaired nucleotides without having to store them -- explicitly. data (VU.Unbox a, Show a, Read a) => Pairs a = Pairs { seqLength :: Int , pairs :: VU.Vector a } deriving (Eq,Show,Read) -- * Canonical secondary structures -- | Most specific instance, just wrapping length and vector instance MkSecondary (Int,VU.Vector CwwPair) where mkSecondary (l,xs) = Pairs l xs -- | Only conversion to vector from list instance MkSecondary (Int,[CwwPair]) where mkSecondary (l,xs) = mkSecondary (l,VU.fromList xs) -- | Instance for MkSecondary instance MkSecondary String where mkSecondary xs = mkPairList xs -- * Extended secondary structures -- | The most specific instance possible instance MkExtended (Int,VU.Vector ExtPair) where mkExtended (l,xs) = Pairs l xs -- | Just turn list into a vector instance MkExtended (Int,[ExtPair]) where mkExtended (l,xs) = mkExtended (l,VU.fromList xs) -- | Create from a string. Here, we do not have pair family information and so -- assume that all pairs are of the cWW type. instance MkExtended String where mkExtended xs = mkExtended $ mkSecondary xs -- | Create extended structure from CwwPair structure. -- -- TODO still undefined until we have isostericity included instance MkExtended (Pairs CwwPair) where mkExtended Pairs{..} = Pairs seqLength (VU.map (\(i,j) -> (i,j,error "mkExtended needs to know about isostericity")) pairs) -- * A basic dot-bracket parser. Not a datasource due to it being universally needed. -- | We only return 'CwwPair's as dot-bracket pairs, others can not be encoded. mkPairList :: String -> Pairs CwwPair mkPairList s = mkSecondary (length s, concatMap (\b -> mkPairListWith b s) bs) where z = nub s bs = filter (\[l,r] -> l `elem` z && r `elem` z) defBrackets -- | Generic dot-bracket parser accepting more than one bracket type. mkPairListWith :: String -> String -> [CwwPair] mkPairListWith [l,r] s = f [] 0 s where f _ _ [] = [] f s k (x:xs) | x==l = f (k:s) (k+1) xs | x==r = (head s, k) : f (tail s) (k+1) xs | otherwise = f s (k+1) xs -- | These are the default bracket types to consider defBrackets = [ "()" , "{}" , "[]" , "<>" ] ++ [ [b,s] | b <- ['A'..'Z'] | s <- ['a'..'z'] ] -- * Types and instances type CwwPair = (Int,Int) type ExtPair = (Int,Int,Int) instance (NFData a, Read a, Show a, VU.Unbox a) => NFData (Pairs a) where rnf Pairs{..} = rnf seqLength `seq` rnf (VU.length pairs) `seq` ()