{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} {- | Module : ELynx.Data.AminoAcid Description : Amino acid related types and functions Copyright : (c) Dominik Schrempf 2018 License : GPL-3 Maintainer : dominik.schrempf@gmail.com Stability : unstable Portability : portable Creation date: Thu Oct 4 18:26:35 2018. See header of 'ELynx.Data.Alphabet.Alphabet'. Amino acid IUPAC code. See also https://www.bioinformatics.org/sms/iupac.html or https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry. @ Amino Acid Code: Three letter Code: Amino Acid: ---------------- ------------------ ----------- A Ala Alanine C Cys Cysteine D Asp Aspartic Acid E Glu Glutamic Acid F Phe Phenylalanine G Gly Glycine H His Histidine I Ile Isoleucine K Lys Lysine L Leu Leucine M Met Methionine N Asn Asparagine P Pro Proline Q Gln Glutamine R Arg Arginine S Ser Serine T Thr Threonine V Val Valine W Trp Tryptophan Y Tyr Tyrosine ----------------- J Leucine or Isoleucine B Asx Aspartic acid or Asparagine Z Glx Glutamine or Glutamic acid ----------------- X Xaa Any amino acid ----------------- * Stp No amino acid ----------------- - Gap No amino acid . Gap No amino acid @ -} module ELynx.Data.Character.AminoAcidI ( AminoAcidI (..) ) where import Data.Vector.Unboxed.Deriving import Data.Word8 import qualified ELynx.Data.Character.Character as C import ELynx.Tools.ByteString (c2w, w2c) -- | Amino acids. data AminoAcidI = A | C | D | E | F | G | H | I | K | L | M | N | P | Q | R | S | T | V | W | Y | J | B | Z | X | Stop | Gap deriving (Show, Read, Eq, Ord, Enum, Bounded) toWord :: AminoAcidI -> Word8 toWord A = c2w 'A' toWord C = c2w 'C' toWord D = c2w 'D' toWord E = c2w 'E' toWord F = c2w 'F' toWord G = c2w 'G' toWord H = c2w 'H' toWord I = c2w 'I' toWord K = c2w 'K' toWord L = c2w 'L' toWord M = c2w 'M' toWord N = c2w 'N' toWord P = c2w 'P' toWord Q = c2w 'Q' toWord R = c2w 'R' toWord S = c2w 'S' toWord T = c2w 'T' toWord V = c2w 'V' toWord W = c2w 'W' toWord Y = c2w 'Y' toWord J = c2w 'J' toWord B = c2w 'B' toWord Z = c2w 'Z' toWord X = c2w 'X' toWord Stop = c2w '*' toWord Gap = c2w '-' fromWord :: Word8 -> AminoAcidI fromWord w = case w2c w of 'A' -> A 'C' -> C 'D' -> D 'E' -> E 'F' -> F 'G' -> G 'H' -> H 'I' -> I 'K' -> K 'L' -> L 'M' -> M 'N' -> N 'P' -> P 'Q' -> Q 'R' -> R 'S' -> S 'T' -> T 'V' -> V 'W' -> W 'Y' -> Y 'J' -> J 'B' -> B 'Z' -> Z 'X' -> X '*' -> Stop '-' -> Gap '.' -> Gap _ -> error "fromWord: Cannot convert Word8 to AminoAcidI" derivingUnbox "AminoAcidI" [t| AminoAcidI -> Word8 |] [| toWord |] [| fromWord |] instance C.Character AminoAcidI where toWord = toWord fromWord = fromWord instance C.CharacterX AminoAcidI where gap = Gap toStandard :: AminoAcidI -> [AminoAcidI] toStandard A = [A] toStandard C = [C] toStandard D = [D] toStandard E = [E] toStandard F = [F] toStandard G = [G] toStandard H = [H] toStandard I = [I] toStandard K = [K] toStandard L = [L] toStandard M = [M] toStandard N = [N] toStandard P = [P] toStandard Q = [Q] toStandard R = [R] toStandard S = [S] toStandard T = [T] toStandard V = [V] toStandard W = [W] toStandard Y = [Y] toStandard J = [L, I] toStandard B = [D, N] toStandard Z = [E, Q] toStandard X = [A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y] toStandard Stop = [] toStandard Gap = [] instance C.CharacterI AminoAcidI where unknown = X iupac = [J, B, Z, X] toStandard = toStandard