{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} {- | Module : ELynx.Data.NucleotideX Description : Extended nucleotides including gaps and unknowns Copyright : (c) Dominik Schrempf 2018 License : GPL-3 Maintainer : dominik.schrempf@gmail.com Stability : unstable Portability : portable See header of 'ELynx.Data.Alphabet'. Extended nucleotides with gaps. See also https://www.bioinformatics.org/sms/iupac.html or https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry. @ Symbol Description Bases represented Complement ------ ----------- ----------------- ---------- A Adenine A T C Cytosine C G G Guanine G C T Thymine T A ------ ----------- ----------------- ---------- - or . Gap (Zero) - @ -} module ELynx.Data.Character.NucleotideX ( NucleotideX (..) ) where import Data.Vector.Unboxed.Deriving import Data.Word8 import qualified ELynx.Data.Character.Character as C import ELynx.Tools.ByteString (c2w, w2c) -- | Extended nucleotides. data NucleotideX = A | C | G | T | Gap deriving (Show, Read, Eq, Ord, Enum, Bounded) toWord :: NucleotideX -> Word8 toWord A = c2w 'A' toWord C = c2w 'C' toWord G = c2w 'G' toWord T = c2w 'T' toWord Gap = c2w '-' fromWord :: Word8 -> NucleotideX fromWord w = case w2c w of 'A' -> A 'C' -> C 'G' -> G 'T' -> T '-' -> Gap '.' -> Gap c -> error $ "fromWord: Cannot convert " ++ show c ++ " to NucleotideX." derivingUnbox "NucleotideX" [t| NucleotideX -> Word8 |] [| toWord |] [| fromWord |] instance C.Character NucleotideX where toWord = toWord fromWord = fromWord instance C.CharacterX NucleotideX where gap = Gap