{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} -- | Encodings for nucleotides and pairs. Currently very ViennaRNA-centric. -- This might change over time. -- -- TODO do not export Nucleotide ctor? module Biobase.RNA where import Data.Char (toUpper) import Data.Ix import qualified Data.Vector.Unboxed as VU import qualified Data.Vector.Generic as VG import qualified Data.Vector.Generic.Mutable as VGM import Data.Primitive.Types import Data.PrimitiveArray import Data.PrimitiveArray.Ix -- * single nucleotide encoding newtype Nucleotide = Nucleotide Int deriving (Eq,Ord,Ix) (nucE:nucA:nucC:nucG:nucU:_) = map Nucleotide [0..] nucCharList :: [(Int,Char)] nucCharList = zip [0..] "EACGU" charNucList :: [(Char,Int)] charNucList = zip "EACGU" [0..] deriving instance Prim Nucleotide deriving instance VGM.MVector VU.MVector Nucleotide deriving instance VG.Vector VU.Vector Nucleotide deriving instance VU.Unbox Nucleotide -- TODO maybe use 'assert' and do not catch errors in optimized operation?! instance Enum Nucleotide where toEnum x | x>=0 && x<=4 = Nucleotide x | otherwise = error $ "can't make to enum" ++ show x fromEnum (Nucleotide x) = x {-# INLINE toEnum #-} {-# INLINE fromEnum #-} instance Bounded Nucleotide where minBound = nucE maxBound = nucU instance Show Nucleotide where show (Nucleotide x) | Just s <- x `lookup` nucCharList = [s] instance Read Nucleotide where readsPrec p [] = [] readsPrec p (x:xs) | x ==' ' = readsPrec p xs | Just n <- x `lookup` charNucList = [(Nucleotide n,xs)] | otherwise = [] charToNucleotide :: Char -> Nucleotide charToNucleotide = f . toUpper where f x | Just n <- x `lookup` charNucList = Nucleotide n | otherwise = nucE nucleotideToChar :: Nucleotide -> Char nucleotideToChar (Nucleotide n) = f n where f x | Just c <- x `lookup` nucCharList = c -- * Represent nucleotide sequences using an unboxed vector. type Primary = PrimArray Int Nucleotide instance Show Primary where show p = "mkPrimary " ++ unPrimary p class MkPrimary a where mkPrimary :: a -> Primary unPrimary :: Primary -> a instance MkPrimary String where mkPrimary xs = fromList 0 (length xs -1) $ map charToNucleotide xs unPrimary v = map nucleotideToChar $ toList v {-# INLINE mkPrimary #-} {-# INLINE unPrimary #-} instance MkPrimary (VU.Vector Char) where mkPrimary = mkPrimary . VU.toList unPrimary = VU.fromList . unPrimary {-# INLINE mkPrimary #-} {-# INLINE unPrimary #-} -- * Encoding of Watson-Crick and Wobble Pairs in the Vienna RNA package style. newtype ViennaPair = ViennaPair Int deriving (Eq,Ord,Ix) (vpNP:vpCG:vpGC:vpGU:vpUG:vpAU:vpUA:vpNS:_) = map ViennaPair [0..] vpStrList :: [(Int,String)] vpStrList = zip [0..] ["NP","CG","GC","GU","UG","AU","UA","NS"] strVpList :: [(String,Int)] strVpList = zip ["NP","CG","GC","GU","UG","AU","UA","NS"] [0..] deriving instance VGM.MVector VU.MVector ViennaPair deriving instance VG.Vector VU.Vector ViennaPair deriving instance VU.Unbox ViennaPair deriving instance Prim ViennaPair instance Enum ViennaPair where toEnum x | x>=0 && x<=7 = ViennaPair x | otherwise = error $ "can't make to enum" ++ show x fromEnum (ViennaPair x) = x {-# INLINE toEnum #-} {-# INLINE fromEnum #-} instance Bounded ViennaPair where minBound = vpNP maxBound = vpNS instance Show ViennaPair where show (ViennaPair x) | Just s <- x `lookup` vpStrList = s instance Read ViennaPair where readsPrec p [] = [] readsPrec p [x] = [] readsPrec p (x:y:xs) | x ==' ' = readsPrec p (y:xs) | Just n <- (x:y:[]) `lookup` strVpList = [(ViennaPair n,xs)] | otherwise = [] -- * convenience lists acgu = [nucA .. nucU] acguStr = map show acgu acguPairs = [(b1,b2) | b1<-acgu, b2<-acgu] eacgu = [nucE .. nucU] npnsP = [vpNP .. vpNS] cguaP = [vpCG .. vpUA] cgnsP = [vpCG .. vpNS] -- * Transformations between Nucleotide and ViennaPair pairTable = fromAssocs (nucE,nucE) (nucU,nucU) vpNP [ ((nucC,nucG),vpCG) , ((nucG,nucC),vpGC) , ((nucG,nucU),vpGU) , ((nucU,nucG),vpUG) , ((nucA,nucU),vpAU) , ((nucU,nucA),vpUA) ] {- NOINLINE pairTable #-} toPair :: Nucleotide -> Nucleotide -> ViennaPair toPair x y = pairTable ! (x,y) {- | x==nucC&&y==nucG = vpCG | x==nucG&&y==nucC = vpGC | x==nucG&&y==nucU = vpGU | x==nucU&&y==nucG = vpUG | x==nucA&&y==nucU = vpAU | x==nucU&&y==nucA = vpUA | otherwise = vpNP -} {-# INLINE toPair #-} pairs :: Nucleotide -> Nucleotide -> Bool pairs a b = (>vpNP) $ toPair a b {-# INLINE pairs #-} vienna2Tuple p | p==vpCG = (nucC,nucG) | p==vpGC = (nucG,nucC) | p==vpGU = (nucG,nucU) | p==vpUG = (nucU,nucG) | p==vpAU = (nucA,nucU) | p==vpUA = (nucU,nucA) | otherwise = error "non-standard pairs can't be backcasted" {-# INLINE vienna2Tuple #-} vienna2RevTuple p | p==vpCG = (nucG,nucC) | p==vpGC = (nucC,nucG) | p==vpGU = (nucU,nucG) | p==vpUG = (nucG,nucU) | p==vpAU = (nucU,nucA) | p==vpUA = (nucA,nucU) | otherwise = error "non-standard pairs can't be backcasted" {-# INLINE vienna2RevTuple #-} tuple2Vienna (b1,b2) | b1==nucC&&b2==nucG = vpCG | b1==nucG&&b2==nucC = vpGC | b1==nucG&&b2==nucU = vpGU | b1==nucU&&b2==nucG = vpUG | b1==nucA&&b2==nucU = vpAU | b1==nucU&&b2==nucA = vpUA | otherwise = vpNS {-# INLINE tuple2Vienna #-} tuple2RevVienna (a,b) = tuple2Vienna (b,a) {-# INLINE tuple2RevVienna #-}