{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module ELynx.Data.Character.NucleotideI
( NucleotideI (..)
) where
import Data.Vector.Unboxed.Deriving
import Data.Word8
import qualified ELynx.Data.Character.Character as C
import ELynx.Tools.ByteString (c2w, w2c)
data NucleotideI = A | C | G | T
| U | W | S | M | K | R | Y | B | D | H | V
| N
| Gap
deriving (Show, Read, Eq, Ord, Enum, Bounded)
toWord :: NucleotideI -> Word8
toWord A = c2w 'A'
toWord C = c2w 'C'
toWord G = c2w 'G'
toWord T = c2w 'T'
toWord U = c2w 'U'
toWord W = c2w 'W'
toWord S = c2w 'S'
toWord M = c2w 'M'
toWord K = c2w 'K'
toWord R = c2w 'R'
toWord Y = c2w 'Y'
toWord B = c2w 'B'
toWord D = c2w 'D'
toWord H = c2w 'H'
toWord V = c2w 'V'
toWord N = c2w 'N'
toWord Gap = c2w '-'
fromWord :: Word8 -> NucleotideI
fromWord w = case w2c w of
'A' -> A
'C' -> C
'G' -> G
'T' -> T
'U' -> U
'W' -> W
'S' -> S
'M' -> M
'K' -> K
'R' -> R
'Y' -> Y
'B' -> B
'D' -> D
'H' -> H
'V' -> V
'N' -> N
'-' -> Gap
'.' -> Gap
_ -> error "fromWord: Cannot convert to NucleotideI."
derivingUnbox "NucleotideI"
[t| NucleotideI -> Word8 |]
[| toWord |]
[| fromWord |]
instance C.Character NucleotideI where
toWord = toWord
fromWord = fromWord
toStandard :: NucleotideI -> [NucleotideI]
toStandard A = [A]
toStandard C = [C]
toStandard G = [G]
toStandard T = [T]
toStandard U = [T]
toStandard W = [A, T]
toStandard S = [G, C]
toStandard M = [A, C]
toStandard K = [G, T]
toStandard R = [A, G]
toStandard Y = [C, T]
toStandard B = [C, G, T]
toStandard D = [A, G, T]
toStandard H = [A, C, T]
toStandard V = [A, C, G]
toStandard N = [A, C, G, T]
toStandard Gap = []
instance C.CharacterX NucleotideI where
gap = Gap
instance C.CharacterI NucleotideI where
unknown = N
iupac = [U, W, S, M, K, R, Y, B, D, H, V, N]
toStandard = toStandard