-- | This module has the translation tables for the genetic code.
--
-- In addition, @Any@ is included to denote that any amino acid is ok, and
-- @Unknown@ to denote unknown data.  We do have a symbol 'Undef' for undefined
-- amino acids, which denotes error condition.
--
-- TODO this nomenclature might change!

module Biobase.Primary.AA where

import           Control.Arrow ((***),first)
import           Data.Aeson
import           Data.Hashable
import           Data.Ix (Ix(..))
import           Data.Map.Strict (Map)
import           Data.Primitive.Types
import           Data.Tuple (swap)
import           Data.Vector.Unboxed.Deriving
import           GHC.Base (remInt,quotInt)
import           GHC.Generics (Generic)
import qualified Data.Bijection.HashMap as B
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Foldable as F
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Generic.Mutable as VGM
import qualified Data.Vector.Unboxed as VU
import qualified GHC.Exts as GHC

import Biobase.Types.BioSequence
import Data.Info

import           Biobase.Primary.Letter



pattern  $bStop :: Letter AA n
$mStop :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
Stop   = Letter  0 :: Letter AA n
pattern     $bA :: Letter AA n
$mA :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
A   = Letter  1 :: Letter AA n
pattern     $bB :: Letter AA n
$mB :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
B   = Letter  2 :: Letter AA n
pattern     $bC :: Letter AA n
$mC :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
C   = Letter  3 :: Letter AA n
pattern     $bD :: Letter AA n
$mD :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
D   = Letter  4 :: Letter AA n
pattern     $bE :: Letter AA n
$mE :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
E   = Letter  5 :: Letter AA n
pattern     $bF :: Letter AA n
$mF :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
F   = Letter  6 :: Letter AA n
pattern     $bG :: Letter AA n
$mG :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
G   = Letter  7 :: Letter AA n
pattern     $bH :: Letter AA n
$mH :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
H   = Letter  8 :: Letter AA n
pattern     $bI :: Letter AA n
$mI :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
I   = Letter  9 :: Letter AA n
pattern     $bK :: Letter AA n
$mK :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
K   = Letter 10 :: Letter AA n
pattern     $bL :: Letter AA n
$mL :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
L   = Letter 11 :: Letter AA n
pattern     $bM :: Letter AA n
$mM :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
M   = Letter 12 :: Letter AA n
pattern     $bN :: Letter AA n
$mN :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
N   = Letter 13 :: Letter AA n
pattern     $bP :: Letter AA n
$mP :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
P   = Letter 14 :: Letter AA n
pattern     $bQ :: Letter AA n
$mQ :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
Q   = Letter 15 :: Letter AA n
pattern     $bR :: Letter AA n
$mR :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
R   = Letter 16 :: Letter AA n
pattern     $bS :: Letter AA n
$mS :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
S   = Letter 17 :: Letter AA n
pattern     $bT :: Letter AA n
$mT :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
T   = Letter 18 :: Letter AA n
pattern     $bV :: Letter AA n
$mV :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
V   = Letter 19 :: Letter AA n
pattern     $bW :: Letter AA n
$mW :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
W   = Letter 20 :: Letter AA n
pattern     $bX :: Letter AA n
$mX :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
X   = Letter 21 :: Letter AA n
pattern     $bY :: Letter AA n
$mY :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
Y   = Letter 22 :: Letter AA n
pattern     $bZ :: Letter AA n
$mZ :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
Z   = Letter 23 :: Letter AA n
pattern $bAny :: Letter AA n
$mAny :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
Any     = Letter 24 :: Letter AA n     -- TODO @Any == X@ supposedly!
pattern $bUnknown :: Letter AA n
$mUnknown :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
Unknown = Letter 25 :: Letter AA n
pattern $bUndef :: Letter AA n
$mUndef :: forall r k (n :: k).
Letter AA n -> (Void# -> r) -> (Void# -> r) -> r
Undef   = Letter 26 :: Letter AA n

-- * Creating functions and aa data.

aa :: Int -> Letter AA n
aa :: Int -> Letter AA n
aa = Int -> Letter AA n
forall k seqTy (nameTy :: k). Int -> Letter seqTy nameTy
Letter
{-# Inline aa #-}

aaRange :: Vector (Letter AA n)
aaRange = [Letter AA n] -> Vector (Letter AA n)
forall a. Unbox a => [a] -> Vector a
VU.fromList [Letter AA n
forall k (n :: k). Letter AA n
Stop .. Letter AA n -> Letter AA n
forall a. Enum a => a -> a
pred Letter AA n
forall k (n :: k). Letter AA n
Undef]
{-# NoInline aaRange #-}

instance Bounded (Letter AA n) where
    minBound :: Letter AA n
minBound = Letter AA n
forall k (n :: k). Letter AA n
Stop
    maxBound :: Letter AA n
maxBound = Letter AA n
forall k (n :: k). Letter AA n
Undef

instance LetterChar AA n where
  letterChar :: Letter AA n -> Char
letterChar = Letter AA n -> Char
forall k (n :: k). Letter AA n -> Char
aaChar
  charLetter :: Char -> Letter AA n
charLetter = Char -> Letter AA n
forall k (n :: k). Char -> Letter AA n
charAA

instance ToJSON (Letter AA n) where
  toJSON :: Letter AA n -> Value
toJSON = Char -> Value
forall a. ToJSON a => a -> Value
toJSON (Char -> Value) -> (Letter AA n -> Char) -> Letter AA n -> Value
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Letter AA n -> Char
forall k t (n :: k). LetterChar t n => Letter t n -> Char
letterChar

instance FromJSON (Letter AA n) where
  parseJSON :: Value -> Parser (Letter AA n)
parseJSON = (Char -> Letter AA n) -> Parser Char -> Parser (Letter AA n)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Letter AA n
forall k t (n :: k). LetterChar t n => Char -> Letter t n
charLetter (Parser Char -> Parser (Letter AA n))
-> (Value -> Parser Char) -> Value -> Parser (Letter AA n)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Value -> Parser Char
forall a. FromJSON a => Value -> Parser a
parseJSON

instance Info (Letter AA n) where
  info :: Letter AA n -> String
info = (Char -> String -> String
forall a. a -> [a] -> [a]
:[]) (Char -> String) -> (Letter AA n -> Char) -> Letter AA n -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Letter AA n -> Char
forall k (n :: k). Letter AA n -> Char
aaChar

--instance (GHC.IsString f) => ToJSON (Pretty f (Letter AA)) where
--  toJSON = toJSON . T.pack . map letterChar . GHC.toList . getPretty

-- | Translate 'Char' amino acid representation into efficient 'AA' newtype.

charAA :: Char -> Letter AA n
charAA :: Char -> Letter AA n
charAA = Cod (HashMap Char (Letter AA n))
-> Bimap (HashMap Char (Letter AA n)) (HashMap (Letter AA n) Char)
-> Dom (HashMap Char (Letter AA n))
-> Cod (HashMap Char (Letter AA n))
forall l r. DomCodCnt l r => Cod l -> Bimap l r -> Dom l -> Cod l
B.findWithDefaultL Cod (HashMap Char (Letter AA n))
forall k (n :: k). Letter AA n
Undef Bimap (HashMap Char (Letter AA n)) (HashMap (Letter AA n) Char)
forall k (n :: k).
Bimap (HashMap Char (Letter AA n)) (HashMap (Letter AA n) Char)
charBaa
{-# INLINE charAA #-}

-- | 'Char' representation of an 'AA'.

aaChar :: Letter AA n -> Char
aaChar :: Letter AA n -> Char
aaChar = Cod (HashMap (Letter AA n) Char)
-> Bimap (HashMap Char (Letter AA n)) (HashMap (Letter AA n) Char)
-> Dom (HashMap (Letter AA n) Char)
-> Cod (HashMap (Letter AA n) Char)
forall l r. DomCodCnt l r => Cod r -> Bimap l r -> Dom r -> Cod r
B.findWithDefaultR Char
Cod (HashMap (Letter AA n) Char)
'?' Bimap (HashMap Char (Letter AA n)) (HashMap (Letter AA n) Char)
forall k (n :: k).
Bimap (HashMap Char (Letter AA n)) (HashMap (Letter AA n) Char)
charBaa
{-# INLINE aaChar #-}

-- * lookup tables

charBaa :: B.Bimap (B.HashMap Char (Letter AA n)) (B.HashMap (Letter AA n) Char)
charBaa :: Bimap (HashMap Char (Letter AA n)) (HashMap (Letter AA n) Char)
charBaa = [(Dom (HashMap Char (Letter AA n)),
  Dom (HashMap (Letter AA n) Char))]
-> Bimap (HashMap Char (Letter AA n)) (HashMap (Letter AA n) Char)
forall l r. DomCodCnt l r => [(Dom l, Dom r)] -> Bimap l r
B.fromList
  [ (Char
Dom (HashMap Char (Letter AA n))
'*',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
Stop)
  , (Char
Dom (HashMap Char (Letter AA n))
'A',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
A)
  , (Char
Dom (HashMap Char (Letter AA n))
'B',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
B)
  , (Char
Dom (HashMap Char (Letter AA n))
'C',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
C)
  , (Char
Dom (HashMap Char (Letter AA n))
'D',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
D)
  , (Char
Dom (HashMap Char (Letter AA n))
'E',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
E)
  , (Char
Dom (HashMap Char (Letter AA n))
'F',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
F)
  , (Char
Dom (HashMap Char (Letter AA n))
'G',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
G)
  , (Char
Dom (HashMap Char (Letter AA n))
'H',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
H)
  , (Char
Dom (HashMap Char (Letter AA n))
'I',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
I)
  , (Char
Dom (HashMap Char (Letter AA n))
'K',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
K)
  , (Char
Dom (HashMap Char (Letter AA n))
'L',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
L)
  , (Char
Dom (HashMap Char (Letter AA n))
'M',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
M)
  , (Char
Dom (HashMap Char (Letter AA n))
'N',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
N)
  , (Char
Dom (HashMap Char (Letter AA n))
'P',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
P)
  , (Char
Dom (HashMap Char (Letter AA n))
'Q',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
Q)
  , (Char
Dom (HashMap Char (Letter AA n))
'R',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
R)
  , (Char
Dom (HashMap Char (Letter AA n))
'S',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
S)
  , (Char
Dom (HashMap Char (Letter AA n))
'T',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
T)
  , (Char
Dom (HashMap Char (Letter AA n))
'V',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
V)
  , (Char
Dom (HashMap Char (Letter AA n))
'W',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
W)
  , (Char
Dom (HashMap Char (Letter AA n))
'X',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
X)
  , (Char
Dom (HashMap Char (Letter AA n))
'Y',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
Y)
  , (Char
Dom (HashMap Char (Letter AA n))
'Z',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
Z)
  , (Char
Dom (HashMap Char (Letter AA n))
'?',Dom (HashMap (Letter AA n) Char)
forall k (n :: k). Letter AA n
Unknown)
  ]
{-# NOINLINE charBaa #-}

-- | List of the twenty "default" amino acids. Used, for example, by HMMer.

twentyAA :: VU.Vector (Letter AA n)
twentyAA :: Vector (Letter AA n)
twentyAA = [Letter AA n] -> Vector (Letter AA n)
forall a. Unbox a => [a] -> Vector a
VU.fromList [ Letter AA n
forall k (n :: k). Letter AA n
A,Letter AA n
forall k (n :: k). Letter AA n
C,Letter AA n
forall k (n :: k). Letter AA n
D,Letter AA n
forall k (n :: k). Letter AA n
E,Letter AA n
forall k (n :: k). Letter AA n
F,Letter AA n
forall k (n :: k). Letter AA n
G,Letter AA n
forall k (n :: k). Letter AA n
H,Letter AA n
forall k (n :: k). Letter AA n
I,Letter AA n
forall k (n :: k). Letter AA n
K,Letter AA n
forall k (n :: k). Letter AA n
L,Letter AA n
forall k (n :: k). Letter AA n
M,Letter AA n
forall k (n :: k). Letter AA n
N,Letter AA n
forall k (n :: k). Letter AA n
P,Letter AA n
forall k (n :: k). Letter AA n
Q,Letter AA n
forall k (n :: k). Letter AA n
R,Letter AA n
forall k (n :: k). Letter AA n
S,Letter AA n
forall k (n :: k). Letter AA n
T,Letter AA n
forall k (n :: k). Letter AA n
V,Letter AA n
forall k (n :: k). Letter AA n
W,Letter AA n
forall k (n :: k). Letter AA n
Y ]
{-# NoInline twentyAA #-}


-- * instances

instance Show (Letter AA n) where
  show :: Letter AA n -> String
show Letter AA n
n = [Letter AA n -> Char
forall k (n :: k). Letter AA n -> Char
aaChar Letter AA n
n]

instance Read (Letter AA n) where
  readsPrec :: Int -> ReadS (Letter AA n)
readsPrec Int
p [] = []
  readsPrec Int
p (Char
x:String
xs)
    | Char
xChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
' ' = Int -> ReadS (Letter AA n)
forall a. Read a => Int -> ReadS a
readsPrec Int
p String
xs
    | Letter AA n
aa <- Char -> Letter AA n
forall k (n :: k). Char -> Letter AA n
charAA Char
x = [(Letter AA n
aa,String
xs)]
    | Bool
otherwise = []

instance Enum (Letter AA n) where
    succ :: Letter AA n -> Letter AA n
succ Letter AA n
Undef      = String -> Letter AA n
forall a. HasCallStack => String -> a
error String
"succ/Undef:AA"
    succ (Letter Int
x) = Int -> Letter AA n
forall k seqTy (nameTy :: k). Int -> Letter seqTy nameTy
Letter (Int -> Letter AA n) -> Int -> Letter AA n
forall a b. (a -> b) -> a -> b
$ Int
xInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
    pred :: Letter AA n -> Letter AA n
pred Letter AA n
Stop       = String -> Letter AA n
forall a. HasCallStack => String -> a
error String
"pred/Stop:AA"
    pred (Letter Int
x) = Int -> Letter AA n
forall k seqTy (nameTy :: k). Int -> Letter seqTy nameTy
Letter (Int -> Letter AA n) -> Int -> Letter AA n
forall a b. (a -> b) -> a -> b
$ Int
xInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1
    toEnum :: Int -> Letter AA n
toEnum Int
k | Int
kInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>=Int
0 Bool -> Bool -> Bool
&& Int
kInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<=(Letter AA Any -> Int
forall seqTy k (nameTy :: k). Letter seqTy nameTy -> Int
getLetter Letter AA Any
forall k (n :: k). Letter AA n
Undef) = Int -> Letter AA n
forall k seqTy (nameTy :: k). Int -> Letter seqTy nameTy
Letter Int
k
    toEnum Int
k                               = String -> Letter AA n
forall a. HasCallStack => String -> a
error (String -> Letter AA n) -> String -> Letter AA n
forall a b. (a -> b) -> a -> b
$ String
"toEnum/Letter RNA " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
k
    fromEnum :: Letter AA n -> Int
fromEnum (Letter Int
k) = Int
k

instance MkPrimary (VU.Vector Char) AA n where
  primary :: Vector Char -> Primary AA n
primary = (Char -> Letter AA n) -> Vector Char -> Primary AA n
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map Char -> Letter AA n
forall k (n :: k). Char -> Letter AA n
charAA