module Biobase.Primary.Nuc.RNA where import Control.Category ((>>>)) import Control.Lens (Iso', iso) import Data.Aeson import Data.Char (toUpper) import Data.Ix (Ix(..)) import Data.Primitive.Types import Data.String import Data.Tuple (swap) import qualified Data.ByteString.Builder as BB import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as BSL import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Vector.Generic as VG import qualified Data.Vector.Generic.Mutable as VGM import qualified Data.Vector.Unboxed as VU import Biobase.Primary.Bounds import Biobase.Primary.Letter -- | RNA nucleotides. data RNA pattern A = Letter 0 :: Letter RNA pattern C = Letter 1 :: Letter RNA pattern G = Letter 2 :: Letter RNA pattern U = Letter 3 :: Letter RNA pattern N = Letter 4 :: Letter RNA instance Bounded (Letter RNA) where minBound = A maxBound = N instance Enum (Letter RNA) where succ N = error "succ/N:RNA" succ (Letter x) = Letter $ x+1 pred A = error "pred/A:RNA" pred (Letter x) = Letter $ x-1 toEnum k | k>=0 && k<=4 = Letter k toEnum k = error $ "toEnum/Letter RNA " ++ show k fromEnum (Letter k) = k instance LetterChar RNA where letterChar = rnaChar charLetter = charRNA instance ToJSON (Letter RNA) where toJSON = toJSON . letterChar instance FromJSON (Letter RNA) where parseJSON = fmap charLetter . parseJSON -- We encode 'Primary RNA' directly as a string. -- -- TODO we can't anymore, because this is not a newtype, just a type. --instance ToJSON (Primary RNA) where -- toJSON = toJSON . VU.toList . VU.map letterChar -- --instance FromJSON (Primary RNA) where -- parseJSON = fmap (primary :: String -> Primary RNA) . parseJSON acgu :: [Letter RNA] acgu = [A .. U] charRNA = toUpper >>> \case 'A' -> A 'C' -> C 'G' -> G 'U' -> U _ -> N {-# INLINE charRNA #-} rnaChar = \case A -> 'A' C -> 'C' G -> 'G' U -> 'U' N -> 'N' {-# INLINE rnaChar #-} -- | An isomorphism from 'Char' to 'Letter RNA'. This assumes that the -- underlying @Char@s actually represent an RNA sequence. This allows typesafe -- modification of RNA sequences since only @[A,C,G,U,N]@ are allowed. crna ∷ Iso' Char (Letter RNA) crna = iso charRNA rnaChar instance Show (Letter RNA) where show c = [rnaChar c] instance Read (Letter RNA) where readsPrec p [] = [] readsPrec p (x:xs) | x==' ' = readsPrec p xs | otherwise = [(charRNA x, xs)] rnaSeq :: MkPrimary n RNA => n -> Primary RNA rnaSeq = primary instance MkPrimary (VU.Vector Char) RNA where primary = VU.map charRNA instance IsString [Letter RNA] where fromString = map charRNA