{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}

-- |
-- Module      :  ELynx.Data.NucleotideX
-- Description :  Extended nucleotides including gaps and unknowns
-- Copyright   :  (c) Dominik Schrempf 2021
--
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- See header of 'ELynx.Data.Alphabet'.
--
-- Extended nucleotides with gaps. See also
-- https://www.bioinformatics.org/sms/iupac.html or
-- https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry.
--
-- @
-- Symbol  Description  Bases represented  Complement
-- ------  -----------  -----------------  ----------
-- A       Adenine      A                  T
-- C       Cytosine        C               G
-- G       Guanine            G            C
-- T       Thymine               T         A
-- ------  -----------  -----------------  ----------
-- - or .  Gap (Zero)                      -
-- @
module ELynx.Data.Character.NucleotideX
  ( NucleotideX (..),
  )
where

import Data.ByteString.Internal (c2w, w2c)
import Data.Vector.Unboxed.Deriving
import Data.Word8
import qualified ELynx.Data.Character.Character as C

-- | Extended nucleotides.
data NucleotideX
  = A
  | C
  | G
  | T
  | Gap
  deriving (Int -> NucleotideX -> ShowS
[NucleotideX] -> ShowS
NucleotideX -> String
(Int -> NucleotideX -> ShowS)
-> (NucleotideX -> String)
-> ([NucleotideX] -> ShowS)
-> Show NucleotideX
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [NucleotideX] -> ShowS
$cshowList :: [NucleotideX] -> ShowS
show :: NucleotideX -> String
$cshow :: NucleotideX -> String
showsPrec :: Int -> NucleotideX -> ShowS
$cshowsPrec :: Int -> NucleotideX -> ShowS
Show, ReadPrec [NucleotideX]
ReadPrec NucleotideX
Int -> ReadS NucleotideX
ReadS [NucleotideX]
(Int -> ReadS NucleotideX)
-> ReadS [NucleotideX]
-> ReadPrec NucleotideX
-> ReadPrec [NucleotideX]
-> Read NucleotideX
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [NucleotideX]
$creadListPrec :: ReadPrec [NucleotideX]
readPrec :: ReadPrec NucleotideX
$creadPrec :: ReadPrec NucleotideX
readList :: ReadS [NucleotideX]
$creadList :: ReadS [NucleotideX]
readsPrec :: Int -> ReadS NucleotideX
$creadsPrec :: Int -> ReadS NucleotideX
Read, NucleotideX -> NucleotideX -> Bool
(NucleotideX -> NucleotideX -> Bool)
-> (NucleotideX -> NucleotideX -> Bool) -> Eq NucleotideX
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: NucleotideX -> NucleotideX -> Bool
$c/= :: NucleotideX -> NucleotideX -> Bool
== :: NucleotideX -> NucleotideX -> Bool
$c== :: NucleotideX -> NucleotideX -> Bool
Eq, Eq NucleotideX
Eq NucleotideX
-> (NucleotideX -> NucleotideX -> Ordering)
-> (NucleotideX -> NucleotideX -> Bool)
-> (NucleotideX -> NucleotideX -> Bool)
-> (NucleotideX -> NucleotideX -> Bool)
-> (NucleotideX -> NucleotideX -> Bool)
-> (NucleotideX -> NucleotideX -> NucleotideX)
-> (NucleotideX -> NucleotideX -> NucleotideX)
-> Ord NucleotideX
NucleotideX -> NucleotideX -> Bool
NucleotideX -> NucleotideX -> Ordering
NucleotideX -> NucleotideX -> NucleotideX
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: NucleotideX -> NucleotideX -> NucleotideX
$cmin :: NucleotideX -> NucleotideX -> NucleotideX
max :: NucleotideX -> NucleotideX -> NucleotideX
$cmax :: NucleotideX -> NucleotideX -> NucleotideX
>= :: NucleotideX -> NucleotideX -> Bool
$c>= :: NucleotideX -> NucleotideX -> Bool
> :: NucleotideX -> NucleotideX -> Bool
$c> :: NucleotideX -> NucleotideX -> Bool
<= :: NucleotideX -> NucleotideX -> Bool
$c<= :: NucleotideX -> NucleotideX -> Bool
< :: NucleotideX -> NucleotideX -> Bool
$c< :: NucleotideX -> NucleotideX -> Bool
compare :: NucleotideX -> NucleotideX -> Ordering
$ccompare :: NucleotideX -> NucleotideX -> Ordering
$cp1Ord :: Eq NucleotideX
Ord, Int -> NucleotideX
NucleotideX -> Int
NucleotideX -> [NucleotideX]
NucleotideX -> NucleotideX
NucleotideX -> NucleotideX -> [NucleotideX]
NucleotideX -> NucleotideX -> NucleotideX -> [NucleotideX]
(NucleotideX -> NucleotideX)
-> (NucleotideX -> NucleotideX)
-> (Int -> NucleotideX)
-> (NucleotideX -> Int)
-> (NucleotideX -> [NucleotideX])
-> (NucleotideX -> NucleotideX -> [NucleotideX])
-> (NucleotideX -> NucleotideX -> [NucleotideX])
-> (NucleotideX -> NucleotideX -> NucleotideX -> [NucleotideX])
-> Enum NucleotideX
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: NucleotideX -> NucleotideX -> NucleotideX -> [NucleotideX]
$cenumFromThenTo :: NucleotideX -> NucleotideX -> NucleotideX -> [NucleotideX]
enumFromTo :: NucleotideX -> NucleotideX -> [NucleotideX]
$cenumFromTo :: NucleotideX -> NucleotideX -> [NucleotideX]
enumFromThen :: NucleotideX -> NucleotideX -> [NucleotideX]
$cenumFromThen :: NucleotideX -> NucleotideX -> [NucleotideX]
enumFrom :: NucleotideX -> [NucleotideX]
$cenumFrom :: NucleotideX -> [NucleotideX]
fromEnum :: NucleotideX -> Int
$cfromEnum :: NucleotideX -> Int
toEnum :: Int -> NucleotideX
$ctoEnum :: Int -> NucleotideX
pred :: NucleotideX -> NucleotideX
$cpred :: NucleotideX -> NucleotideX
succ :: NucleotideX -> NucleotideX
$csucc :: NucleotideX -> NucleotideX
Enum, NucleotideX
NucleotideX -> NucleotideX -> Bounded NucleotideX
forall a. a -> a -> Bounded a
maxBound :: NucleotideX
$cmaxBound :: NucleotideX
minBound :: NucleotideX
$cminBound :: NucleotideX
Bounded)

toWord :: NucleotideX -> Word8
toWord :: NucleotideX -> Word8
toWord NucleotideX
A = Char -> Word8
c2w Char
'A'
toWord NucleotideX
C = Char -> Word8
c2w Char
'C'
toWord NucleotideX
G = Char -> Word8
c2w Char
'G'
toWord NucleotideX
T = Char -> Word8
c2w Char
'T'
toWord NucleotideX
Gap = Char -> Word8
c2w Char
'-'

fromWord :: Word8 -> NucleotideX
fromWord :: Word8 -> NucleotideX
fromWord Word8
w = case Word8 -> Char
w2c Word8
w of
  Char
'A' -> NucleotideX
A
  Char
'C' -> NucleotideX
C
  Char
'G' -> NucleotideX
G
  Char
'T' -> NucleotideX
T
  Char
'-' -> NucleotideX
Gap
  Char
'.' -> NucleotideX
Gap
  Char
c -> String -> NucleotideX
forall a. HasCallStack => String -> a
error (String -> NucleotideX) -> String -> NucleotideX
forall a b. (a -> b) -> a -> b
$ String
"fromWord: Cannot convert " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Char -> String
forall a. Show a => a -> String
show Char
c String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" to NucleotideX."

derivingUnbox
  "NucleotideX"
  [t|NucleotideX -> Word8|]
  [|toWord|]
  [|fromWord|]

instance C.Character NucleotideX where
  toWord :: NucleotideX -> Word8
toWord = NucleotideX -> Word8
toWord
  fromWord :: Word8 -> NucleotideX
fromWord = Word8 -> NucleotideX
fromWord

instance C.CharacterX NucleotideX where
  gap :: NucleotideX
gap = NucleotideX
Gap