{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}

-- |
-- Module      :  ELynx.Data.Nucleotide
-- Description :  Nucleotides
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Thu Oct  4 18:26:35 2018.
--
-- See header of 'ELynx.Data.Alphabet.Alphabet'.
--
-- @
-- Symbol  Description  Bases represented  Complement
-- ------  -----------  -----------------  ----------
-- A       Adenine      A                  T
-- C       Cytosine        C               G
-- G       Guanine            G            C
-- T       Thymine               T         A
-- @
module ELynx.Data.Character.Nucleotide
  ( Nucleotide (..),
  )
where

import Data.ByteString.Internal (c2w, w2c)
import Data.Vector.Unboxed.Deriving
import Data.Word8
import qualified ELynx.Data.Character.Character as C

-- | Nucleotides.
data Nucleotide = A | C | G | T
  deriving (Int -> Nucleotide -> ShowS
[Nucleotide] -> ShowS
Nucleotide -> String
(Int -> Nucleotide -> ShowS)
-> (Nucleotide -> String)
-> ([Nucleotide] -> ShowS)
-> Show Nucleotide
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Nucleotide] -> ShowS
$cshowList :: [Nucleotide] -> ShowS
show :: Nucleotide -> String
$cshow :: Nucleotide -> String
showsPrec :: Int -> Nucleotide -> ShowS
$cshowsPrec :: Int -> Nucleotide -> ShowS
Show, ReadPrec [Nucleotide]
ReadPrec Nucleotide
Int -> ReadS Nucleotide
ReadS [Nucleotide]
(Int -> ReadS Nucleotide)
-> ReadS [Nucleotide]
-> ReadPrec Nucleotide
-> ReadPrec [Nucleotide]
-> Read Nucleotide
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Nucleotide]
$creadListPrec :: ReadPrec [Nucleotide]
readPrec :: ReadPrec Nucleotide
$creadPrec :: ReadPrec Nucleotide
readList :: ReadS [Nucleotide]
$creadList :: ReadS [Nucleotide]
readsPrec :: Int -> ReadS Nucleotide
$creadsPrec :: Int -> ReadS Nucleotide
Read, Nucleotide -> Nucleotide -> Bool
(Nucleotide -> Nucleotide -> Bool)
-> (Nucleotide -> Nucleotide -> Bool) -> Eq Nucleotide
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Nucleotide -> Nucleotide -> Bool
$c/= :: Nucleotide -> Nucleotide -> Bool
== :: Nucleotide -> Nucleotide -> Bool
$c== :: Nucleotide -> Nucleotide -> Bool
Eq, Eq Nucleotide
Eq Nucleotide
-> (Nucleotide -> Nucleotide -> Ordering)
-> (Nucleotide -> Nucleotide -> Bool)
-> (Nucleotide -> Nucleotide -> Bool)
-> (Nucleotide -> Nucleotide -> Bool)
-> (Nucleotide -> Nucleotide -> Bool)
-> (Nucleotide -> Nucleotide -> Nucleotide)
-> (Nucleotide -> Nucleotide -> Nucleotide)
-> Ord Nucleotide
Nucleotide -> Nucleotide -> Bool
Nucleotide -> Nucleotide -> Ordering
Nucleotide -> Nucleotide -> Nucleotide
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Nucleotide -> Nucleotide -> Nucleotide
$cmin :: Nucleotide -> Nucleotide -> Nucleotide
max :: Nucleotide -> Nucleotide -> Nucleotide
$cmax :: Nucleotide -> Nucleotide -> Nucleotide
>= :: Nucleotide -> Nucleotide -> Bool
$c>= :: Nucleotide -> Nucleotide -> Bool
> :: Nucleotide -> Nucleotide -> Bool
$c> :: Nucleotide -> Nucleotide -> Bool
<= :: Nucleotide -> Nucleotide -> Bool
$c<= :: Nucleotide -> Nucleotide -> Bool
< :: Nucleotide -> Nucleotide -> Bool
$c< :: Nucleotide -> Nucleotide -> Bool
compare :: Nucleotide -> Nucleotide -> Ordering
$ccompare :: Nucleotide -> Nucleotide -> Ordering
$cp1Ord :: Eq Nucleotide
Ord, Int -> Nucleotide
Nucleotide -> Int
Nucleotide -> [Nucleotide]
Nucleotide -> Nucleotide
Nucleotide -> Nucleotide -> [Nucleotide]
Nucleotide -> Nucleotide -> Nucleotide -> [Nucleotide]
(Nucleotide -> Nucleotide)
-> (Nucleotide -> Nucleotide)
-> (Int -> Nucleotide)
-> (Nucleotide -> Int)
-> (Nucleotide -> [Nucleotide])
-> (Nucleotide -> Nucleotide -> [Nucleotide])
-> (Nucleotide -> Nucleotide -> [Nucleotide])
-> (Nucleotide -> Nucleotide -> Nucleotide -> [Nucleotide])
-> Enum Nucleotide
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: Nucleotide -> Nucleotide -> Nucleotide -> [Nucleotide]
$cenumFromThenTo :: Nucleotide -> Nucleotide -> Nucleotide -> [Nucleotide]
enumFromTo :: Nucleotide -> Nucleotide -> [Nucleotide]
$cenumFromTo :: Nucleotide -> Nucleotide -> [Nucleotide]
enumFromThen :: Nucleotide -> Nucleotide -> [Nucleotide]
$cenumFromThen :: Nucleotide -> Nucleotide -> [Nucleotide]
enumFrom :: Nucleotide -> [Nucleotide]
$cenumFrom :: Nucleotide -> [Nucleotide]
fromEnum :: Nucleotide -> Int
$cfromEnum :: Nucleotide -> Int
toEnum :: Int -> Nucleotide
$ctoEnum :: Int -> Nucleotide
pred :: Nucleotide -> Nucleotide
$cpred :: Nucleotide -> Nucleotide
succ :: Nucleotide -> Nucleotide
$csucc :: Nucleotide -> Nucleotide
Enum, Nucleotide
Nucleotide -> Nucleotide -> Bounded Nucleotide
forall a. a -> a -> Bounded a
maxBound :: Nucleotide
$cmaxBound :: Nucleotide
minBound :: Nucleotide
$cminBound :: Nucleotide
Bounded)

-- See https://stackoverflow.com/a/31527024; apparently, pattern matching (and
-- case statements) are fast because they are compiled to lookup tables. Hence,
-- they are faster than guards (because equality has to be checked), and faster
-- than lookups with sets.
toWord :: Nucleotide -> Word8
toWord :: Nucleotide -> Word8
toWord Nucleotide
A = Char -> Word8
c2w Char
'A'
toWord Nucleotide
C = Char -> Word8
c2w Char
'C'
toWord Nucleotide
G = Char -> Word8
c2w Char
'G'
toWord Nucleotide
T = Char -> Word8
c2w Char
'T'

fromWord :: Word8 -> Nucleotide
fromWord :: Word8 -> Nucleotide
fromWord Word8
w = case Word8 -> Char
w2c Word8
w of
  Char
'A' -> Nucleotide
A
  Char
'C' -> Nucleotide
C
  Char
'G' -> Nucleotide
G
  Char
'T' -> Nucleotide
T
  Char
c -> String -> Nucleotide
forall a. HasCallStack => String -> a
error (String -> Nucleotide) -> String -> Nucleotide
forall a b. (a -> b) -> a -> b
$ String
"fromWord: Cannot convert " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Char -> String
forall a. Show a => a -> String
show Char
c String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" to Nucleotide."

derivingUnbox
  "Nucleotide"
  [t|Nucleotide -> Word8|]
  [|toWord|]
  [|fromWord|]

instance C.Character Nucleotide where
  toWord :: Nucleotide -> Word8
toWord = Nucleotide -> Word8
toWord
  fromWord :: Word8 -> Nucleotide
fromWord = Word8 -> Nucleotide
fromWord