{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} -- | -- Module : ELynx.Data.Nucleotide -- Description : Nucleotides -- Copyright : (c) Dominik Schrempf 2021 -- License : GPL-3.0-or-later -- -- Maintainer : dominik.schrempf@gmail.com -- Stability : unstable -- Portability : portable -- -- Creation date: Thu Oct 4 18:26:35 2018. -- -- See header of 'ELynx.Data.Alphabet.Alphabet'. -- -- @ -- Symbol Description Bases represented Complement -- ------ ----------- ----------------- ---------- -- A Adenine A T -- C Cytosine C G -- G Guanine G C -- T Thymine T A -- @ module ELynx.Data.Character.Nucleotide ( Nucleotide (..), ) where import Data.ByteString.Internal (c2w, w2c) import Data.Vector.Unboxed.Deriving import Data.Word8 import qualified ELynx.Data.Character.Character as C -- | Nucleotides. data Nucleotide = A | C | G | T deriving (Show, Read, Eq, Ord, Enum, Bounded) -- See https://stackoverflow.com/a/31527024; apparently, pattern matching (and -- case statements) are fast because they are compiled to lookup tables. Hence, -- they are faster than guards (because equality has to be checked), and faster -- than lookups with sets. toWord :: Nucleotide -> Word8 toWord A = c2w 'A' toWord C = c2w 'C' toWord G = c2w 'G' toWord T = c2w 'T' fromWord :: Word8 -> Nucleotide fromWord w = case w2c w of 'A' -> A 'C' -> C 'G' -> G 'T' -> T c -> error $ "fromWord: Cannot convert " ++ show c ++ " to Nucleotide." derivingUnbox "Nucleotide" [t|Nucleotide -> Word8|] [|toWord|] [|fromWord|] instance C.Character Nucleotide where toWord = toWord fromWord = fromWord