{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}

-- | Encodings for nucleotides and pairs. Currently very ViennaRNA-centric.
-- This might change over time.
--
-- TODO do not export Nucleotide ctor?
--
-- TODO consider adding "nucAmpersand" as a chain seperator. This could make
-- some things easier.

module Biobase.RNA where

import Data.Char (toUpper)
import Data.Ix
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Generic.Mutable as VGM
import Data.Primitive.Types

import Data.PrimitiveArray
import Data.PrimitiveArray.Ix



-- * single nucleotide encoding

newtype Nucleotide = Nucleotide Int
  deriving (Eq,Ord,Ix)

(nucE:nucA:nucC:nucG:nucU:_) = map Nucleotide [0..]

nucCharList :: [(Int,Char)]
nucCharList = zip [0..] "EACGU"
charNucList :: [(Char,Int)]
charNucList = zip "EACGU" [0..]

deriving instance Prim Nucleotide
deriving instance VGM.MVector VU.MVector Nucleotide
deriving instance VG.Vector VU.Vector Nucleotide
deriving instance VU.Unbox Nucleotide

-- TODO maybe use 'assert' and do not catch errors in optimized operation?!

instance Enum Nucleotide where
  toEnum x
    | x>=0 && x<=4 = Nucleotide x
    | otherwise    = error $ "can't make to enum" ++ show x
  fromEnum (Nucleotide x) = x
  {-# INLINE toEnum #-}
  {-# INLINE fromEnum #-}

instance Bounded Nucleotide where
  minBound = nucE
  maxBound = nucU

instance Show Nucleotide where
  show (Nucleotide x)
    | Just s <- x `lookup` nucCharList = [s]

instance Read Nucleotide where
  readsPrec p [] = []
  readsPrec p (x:xs)
    | x ==' ' = readsPrec p xs
    | Just n <- x `lookup` charNucList = [(Nucleotide n,xs)]
    | otherwise = []

charToNucleotide :: Char -> Nucleotide
charToNucleotide = f . toUpper where
  f x
    | Just n <- x `lookup` charNucList = Nucleotide n
    | otherwise = nucE

-- | Since this function is used rather often.

char2nuc = charToNucleotide

nucleotideToChar :: Nucleotide -> Char
nucleotideToChar (Nucleotide n) = f n where
  f x
    | Just c <- x `lookup` nucCharList = c



-- * Represent nucleotide sequences using a PrimArray.
--
-- TODO switch back to Unboxed.Vector? Test speed of both approaches!

type Primary = PrimArray Int Nucleotide

-- |
--
-- TODO is this fast enough?

instance Eq Primary where
  xs == ys = bounds xs == bounds ys && toList xs == toList ys

instance Show Primary where
  show p = "mkPrimary " ++ unPrimary p

instance Read Primary where
  readsPrec p xs
    | "mkPrimary " == chk = [(mkPrimary ps,ys)]
    | otherwise           = error $ show (chk,others,ps,ys)
    where
      (chk,others) = splitAt 10 xs
      (ps,ys) = span (`elem` "ACGUE") others

class MkPrimary a where
  mkPrimary :: a -> Primary
  unPrimary :: Primary -> a

instance MkPrimary String where
  mkPrimary xs = fromList 0 (length xs -1) $ map charToNucleotide xs
  unPrimary v = map nucleotideToChar $ toList v
  {-# INLINE mkPrimary #-}
  {-# INLINE unPrimary #-}

instance MkPrimary (VU.Vector Char) where
  mkPrimary = mkPrimary . VU.toList
  unPrimary = VU.fromList . unPrimary
  {-# INLINE mkPrimary #-}
  {-# INLINE unPrimary #-}



-- * Encoding of Watson-Crick and Wobble Pairs in the Vienna RNA package style.

newtype ViennaPair = ViennaPair Int
  deriving (Eq,Ord,Ix)

(vpNP:vpCG:vpGC:vpGU:vpUG:vpAU:vpUA:vpNS:_) = map ViennaPair [0..]

vpStrList :: [(Int,String)]
vpStrList = zip [0..] ["NP","CG","GC","GU","UG","AU","UA","NS"]
strVpList :: [(String,Int)]
strVpList = zip ["NP","CG","GC","GU","UG","AU","UA","NS"] [0..]

deriving instance VGM.MVector VU.MVector ViennaPair
deriving instance VG.Vector VU.Vector ViennaPair
deriving instance VU.Unbox ViennaPair
deriving instance Prim ViennaPair

instance Enum ViennaPair where
  toEnum x
    | x>=0 && x<=7 = ViennaPair x
    | otherwise    = error $ "can't make to enum" ++ show x
  fromEnum (ViennaPair x) = x
  {-# INLINE toEnum #-}
  {-# INLINE fromEnum #-}

instance Bounded ViennaPair where
  minBound = vpNP
  maxBound = vpNS

instance Show ViennaPair where
  show (ViennaPair x)
    | Just s <- x `lookup` vpStrList = s

instance Read ViennaPair where
  readsPrec p [] = []
  readsPrec p [x] = []
  readsPrec p (x:y:xs)
    | x ==' ' = readsPrec p (y:xs)
    | Just n <- (x:y:[]) `lookup` strVpList = [(ViennaPair n,xs)]
    | otherwise = []



-- * convenience lists

acgu = [nucA .. nucU]
acguStr = map show acgu
acguPairs = [(b1,b2) | b1<-acgu, b2<-acgu]
eacgu = [nucE .. nucU]
npnsP = [vpNP .. vpNS]
cguaP = [vpCG .. vpUA]
cgnsP = [vpCG .. vpNS]



-- * Transformations between Nucleotide and ViennaPair

pairTable = fromAssocs (nucE,nucE) (nucU,nucU) vpNP
  [ ((nucC,nucG),vpCG)
  , ((nucG,nucC),vpGC)
  , ((nucG,nucU),vpGU)
  , ((nucU,nucG),vpUG)
  , ((nucA,nucU),vpAU)
  , ((nucU,nucA),vpUA)
  ]
{- NOINLINE pairTable #-}

toPair :: Nucleotide -> Nucleotide -> ViennaPair
toPair x y = pairTable ! (x,y)
{-
  | x==nucC&&y==nucG = vpCG
  | x==nucG&&y==nucC = vpGC
  | x==nucG&&y==nucU = vpGU
  | x==nucU&&y==nucG = vpUG
  | x==nucA&&y==nucU = vpAU
  | x==nucU&&y==nucA = vpUA
  | otherwise        = vpNP
-}
{-# INLINE toPair #-}

pairs :: Nucleotide -> Nucleotide -> Bool
pairs a b = (>vpNP) $ toPair a b
{-# INLINE pairs #-}

vienna2Tuple p
  | p==vpCG = (nucC,nucG)
  | p==vpGC = (nucG,nucC)
  | p==vpGU = (nucG,nucU)
  | p==vpUG = (nucU,nucG)
  | p==vpAU = (nucA,nucU)
  | p==vpUA = (nucU,nucA)
  | otherwise = error "non-standard pairs can't be backcasted"
{-# INLINE vienna2Tuple #-}

vienna2RevTuple p
  | p==vpCG = (nucG,nucC)
  | p==vpGC = (nucC,nucG)
  | p==vpGU = (nucU,nucG)
  | p==vpUG = (nucG,nucU)
  | p==vpAU = (nucU,nucA)
  | p==vpUA = (nucA,nucU)
  | otherwise = error "non-standard pairs can't be backcasted"
{-# INLINE vienna2RevTuple #-}

tuple2Vienna (b1,b2)
  | b1==nucC&&b2==nucG = vpCG
  | b1==nucG&&b2==nucC = vpGC
  | b1==nucG&&b2==nucU = vpGU
  | b1==nucU&&b2==nucG = vpUG
  | b1==nucA&&b2==nucU = vpAU
  | b1==nucU&&b2==nucA = vpUA
  | otherwise = vpNS
{-# INLINE tuple2Vienna #-}

tuple2RevVienna (a,b) = tuple2Vienna (b,a)
{-# INLINE tuple2RevVienna #-}