{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}

-- | Encodings for nucleotides and pairs. Currently very ViennaRNA-centric.
-- This might change over time.
--
-- TODO do not export Nucleotide ctor?

module Biobase.RNA where

import Data.Char (toUpper)
import Data.Ix
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Generic.Mutable as VGM
import Data.Primitive.Types

import Data.PrimitiveArray
import Data.PrimitiveArray.Ix



-- * single nucleotide encoding

newtype Nucleotide = Nucleotide Int
  deriving (Eq,Ord,Ix)

(nucE:nucA:nucC:nucG:nucU:_) = map Nucleotide [0..]

nucCharList :: [(Int,Char)]
nucCharList = zip [0..] "EACGU"
charNucList :: [(Char,Int)]
charNucList = zip "EACGU" [0..]

deriving instance Prim Nucleotide
deriving instance VGM.MVector VU.MVector Nucleotide
deriving instance VG.Vector VU.Vector Nucleotide
deriving instance VU.Unbox Nucleotide

-- TODO maybe use 'assert' and do not catch errors in optimized operation?!

instance Enum Nucleotide where
  toEnum x
    | x>=0 && x<=4 = Nucleotide x
    | otherwise    = error $ "can't make to enum" ++ show x
  fromEnum (Nucleotide x) = x
  {-# INLINE toEnum #-}
  {-# INLINE fromEnum #-}

instance Bounded Nucleotide where
  minBound = nucE
  maxBound = nucU

instance Show Nucleotide where
  show (Nucleotide x)
    | Just s <- x `lookup` nucCharList = [s]

instance Read Nucleotide where
  readsPrec p [] = []
  readsPrec p (x:xs)
    | x ==' ' = readsPrec p xs
    | Just n <- x `lookup` charNucList = [(Nucleotide n,xs)]
    | otherwise = []

charToNucleotide :: Char -> Nucleotide
charToNucleotide = f . toUpper where
  f x
    | Just n <- x `lookup` charNucList = Nucleotide n
    | otherwise = nucE

nucleotideToChar :: Nucleotide -> Char
nucleotideToChar (Nucleotide n) = f n where
  f x
    | Just c <- x `lookup` nucCharList = c



-- * Represent nucleotide sequences using an unboxed vector.

type Primary = PrimArray Int Nucleotide

instance Show Primary where
  show p = "mkPrimary " ++ unPrimary p

class MkPrimary a where
  mkPrimary :: a -> Primary
  unPrimary :: Primary -> a

instance MkPrimary String where
  mkPrimary xs = fromList 0 (length xs -1) $ map charToNucleotide xs
  unPrimary v = map nucleotideToChar $ toList v
  {-# INLINE mkPrimary #-}
  {-# INLINE unPrimary #-}

instance MkPrimary (VU.Vector Char) where
  mkPrimary = mkPrimary . VU.toList
  unPrimary = VU.fromList . unPrimary
  {-# INLINE mkPrimary #-}
  {-# INLINE unPrimary #-}



-- * Encoding of Watson-Crick and Wobble Pairs in the Vienna RNA package style.

newtype ViennaPair = ViennaPair Int
  deriving (Eq,Ord,Ix)

(vpNP:vpCG:vpGC:vpGU:vpUG:vpAU:vpUA:vpNS:_) = map ViennaPair [0..]

vpStrList :: [(Int,String)]
vpStrList = zip [0..] ["NP","CG","GC","GU","UG","AU","UA","NS"]
strVpList :: [(String,Int)]
strVpList = zip ["NP","CG","GC","GU","UG","AU","UA","NS"] [0..]

deriving instance VGM.MVector VU.MVector ViennaPair
deriving instance VG.Vector VU.Vector ViennaPair
deriving instance VU.Unbox ViennaPair

instance Enum ViennaPair where
  toEnum x
    | x>=0 && x<=7 = ViennaPair x
    | otherwise    = error $ "can't make to enum" ++ show x
  fromEnum (ViennaPair x) = x
  {-# INLINE toEnum #-}
  {-# INLINE fromEnum #-}

instance Bounded ViennaPair where
  minBound = vpNP
  maxBound = vpNS

instance Show ViennaPair where
  show (ViennaPair x)
    | Just s <- x `lookup` vpStrList = s

instance Read ViennaPair where
  readsPrec p [] = []
  readsPrec p [x] = []
  readsPrec p (x:y:xs)
    | x ==' ' = readsPrec p (y:xs)
    | Just n <- (x:y:[]) `lookup` strVpList = [(ViennaPair n,xs)]
    | otherwise = []



-- * convenience lists

acgu = [nucA .. nucU]
acguStr = map show acgu
acguPairs = [(b1,b2) | b1<-acgu, b2<-acgu]
eacgu = [nucE .. nucU]
npnsP = [vpNP .. vpNS]
cguaP = [vpCG .. vpUA]
cgnsP = [vpCG .. vpNS]



-- * Transformations between Nucleotide and ViennaPair

toPair :: Nucleotide -> Nucleotide -> ViennaPair
toPair x y
  | x==nucC&&y==nucG = vpCG
  | x==nucG&&y==nucC = vpGC
  | x==nucG&&y==nucU = vpGU
  | x==nucU&&y==nucG = vpUG
  | x==nucA&&y==nucU = vpAU
  | x==nucU&&y==nucA = vpUA
  | otherwise        = vpNP
{-# INLINE toPair #-}

pairs :: Nucleotide -> Nucleotide -> Bool
pairs a b = (>vpNP) $ toPair a b
{-# INLINE pairs #-}

vienna2Tuple p
  | p==vpCG = (nucC,nucG)
  | p==vpGC = (nucG,nucC)
  | p==vpGU = (nucG,nucU)
  | p==vpUG = (nucU,nucG)
  | p==vpAU = (nucA,nucU)
  | p==vpUA = (nucU,nucA)
  | otherwise = error "non-standard pairs can't be backcasted"
{-# INLINE vienna2Tuple #-}

vienna2RevTuple p
  | p==vpCG = (nucG,nucC)
  | p==vpGC = (nucC,nucG)
  | p==vpGU = (nucU,nucG)
  | p==vpUG = (nucG,nucU)
  | p==vpAU = (nucU,nucA)
  | p==vpUA = (nucA,nucU)
  | otherwise = error "non-standard pairs can't be backcasted"
{-# INLINE vienna2RevTuple #-}

tuple2Vienna (b1,b2)
  | b1==nucC&&b2==nucG = vpCG
  | b1==nucG&&b2==nucC = vpGC
  | b1==nucG&&b2==nucU = vpGU
  | b1==nucU&&b2==nucG = vpUG
  | b1==nucA&&b2==nucU = vpAU
  | b1==nucU&&b2==nucA = vpUA
  | otherwise = vpNS
{-# INLINE tuple2Vienna #-}

tuple2RevVienna (a,b) = tuple2Vienna (b,a)
{-# INLINE tuple2RevVienna #-}