module Biobase.Primary.Nuc.DNA where

import           Data.Char (toUpper)
import           Data.Ix (Ix(..))
import           Data.Primitive.Types
import           Data.String
import           Data.Tuple (swap)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Generic.Mutable as VGM
import qualified Data.Vector.Unboxed as VU
import           Control.Category ((>>>))

import           Biobase.Primary.Bounds
import           Biobase.Primary.Letter



-- | DNA nucleotides.

data DNA

-- Single-character names for nucleotides.

pattern A = Letter 0 :: Letter DNA
pattern C = Letter 1 :: Letter DNA
pattern G = Letter 2 :: Letter DNA
pattern T = Letter 3 :: Letter DNA
pattern N = Letter 4 :: Letter DNA

instance Enum (Letter DNA) where
    succ N          = error "succ/N:DNA"
    succ (Letter x) = Letter $ x+1
    pred A          = error "pred/A:DNA"
    pred (Letter x) = Letter $ x-1
    toEnum k | k>=0 && k<=4 = Letter k
    toEnum k                = error $ "toEnum/Letter DNA " ++ show k
    fromEnum (Letter k) = k

acgt :: [Letter DNA]
acgt = [A .. T]

charDNA = toUpper >>> \case
    'A' -> A
    'C' -> C
    'G' -> G
    'T' -> T
    _   -> N
{-# INLINE charDNA #-}

dnaChar = \case
  A -> 'A'
  C -> 'C'
  G -> 'G'
  T -> 'T'
  N -> 'N'
{-# INLINE dnaChar #-}            

instance Show (Letter DNA) where
    show c = [dnaChar c]

instance Read (Letter DNA) where
  readsPrec p [] = []
  readsPrec p (x:xs)
    | x==' ' = readsPrec p xs
    | otherwise = [(charDNA x, xs)]

dnaSeq :: MkPrimary n DNA => n -> Primary DNA
dnaSeq = primary

instance Bounded (Letter DNA) where
    minBound = A
    maxBound = N

instance MkPrimary (VU.Vector Char) DNA where
    primary = VU.map charDNA

instance IsString [Letter DNA] where
    fromString = map charDNA