{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} -- | Types for RNA secondary structure. Types vary from the simplest array -- (D1Secondary) to rather complex ones. -- -- TODO The complex ones are still coming in from other libraries. -- -- TODO can we use Char8 instead of Char? {-# LANGUAGE RecordWildCards #-} module Biobase.Structure where import qualified Data.Vector.Unboxed as VU import Data.Vector.Unboxed.Read import Data.List (sort,groupBy) -- * Array-based representation, no notion of structure -- | Create secondary structure by various means. class MkD1Secondary a where mkD1S :: a -> D1Secondary fromD1S :: D1Secondary -> a -- | Most primitive secondary structure generation instance MkD1Secondary (Int,[(Int,Int)]) where mkD1S (len,ps) = D1S $ VU.replicate len (-1) VU.// xs where xs = concatMap (\(i,j) -> [(i,j),(j,i)]) ps fromD1S (D1S s) = (VU.length s, filter ((>=0).snd). zip [0..] $ VU.toList s) -- | A second primitive generator, requiring dictionary and String. This one -- generates pairs that are then used by the above instance. The dict is a list -- of possible brackets: ["()"] being the minimal set. -- -- NOTE no dictionary is returned by "fromD1S". -- -- TODO return dictionary that is actually seen? instance MkD1Secondary ([String],String) where mkD1S (dict,xs) = mkD1S (length xs,ps) where ps :: [(Int,Int)] ps = dotBracket dict xs fromD1S (D1S s) = ([], zipWith f [0..] $ VU.toList s) where f k (-1) = '.' f k p | k>p = ')' | otherwise = '(' -- | Generate Secondary given that we have an unboxed vector of characters instance MkD1Secondary ([String],VU.Vector Char) where mkD1S (dict,xs) = mkD1S (dict, VU.toList xs) fromD1S s = let (dict,res) = fromD1S s in (dict,VU.fromList res) -- | A "fast" instance for getting the pair list of vienna-structures. instance MkD1Secondary String where mkD1S xs = mkD1S (["()"],xs) fromD1S s = let (_::[String],res) = fromD1S s in res instance MkD1Secondary (VU.Vector Char) where mkD1S xs = mkD1S (["()"],xs) fromD1S s = let (_::[String],res::VU.Vector Char) = fromD1S s in res newtype D1Secondary = D1S {unD1S :: VU.Vector Int} deriving (Read,Show,Eq) -- * Helper functions -- | Secondary structure parser which allows pseudoknots, if they use different -- kinds of brackets. dotBracket :: [String] -> String -> [(Int,Int)] dotBracket dict xs = sort . concatMap (f xs) $ dict where f xs [l,r] = g 0 [] . map (\x -> if x `elem` [l,r] then x else '.') $ xs where g :: Int -> [Int] -> String -> [(Int,Int)] g _ st [] = [] g k st ('.':xs) = g (k+1) st xs g k sst (x:xs) | l==x = g (k+1) (k:sst) xs g k (s:st) (x:xs) | r==x = (s,k) : g (k+1) st xs g a b c = error $ show (a,b,c) -- * Tree-based representation: structure is given by the tree -- | secondary structure representation using an explicit tree, SSExt encodes -- the length of the underlying sequence. Each node can contain additional -- information under 'a'. data SSTree a = SSTree Int Int a [SSTree a] | SSExt Int a [SSTree a] deriving (Read,Show,Eq) -- | generate an SSTree from a secondary structure. mkSSTree :: D1Secondary -> SSTree () mkSSTree s = ext $ sort ps where (len,ps) = fromD1S s ext [] = SSExt len () [] ext xs = SSExt len () . map tree $ groupBy (\l r -> snd l > fst r) xs tree [(i,j)] = SSTree i j () [] tree ((i,j):xs) = SSTree i j () . map tree $ groupBy (\l r -> snd l > fst r) xs