module Biobase.Primary.IUPAC where
import           Control.Arrow ((***))
import           Data.ByteString.Char8 (ByteString,unpack)
import           Data.Char (toUpper)
import           Data.FileEmbed (embedFile)
import           Data.List (nub,sort)
import           Data.String
import           Data.Tuple (swap)
import qualified Data.Vector.Unboxed as VU
import           Control.Category ((>>>))
import           Biobase.Types.BioSequence
import           Biobase.Primary.Letter
import           Biobase.Primary.Nuc
import qualified Biobase.Primary.Nuc.RNA as R
data DEG
pattern A = Letter  0 :: Letter DEG n
pattern C = Letter  1 :: Letter DEG n
pattern G = Letter  2 :: Letter DEG n
pattern T = Letter  3 :: Letter DEG n
pattern U = Letter  4 :: Letter DEG n
pattern W = Letter  5 :: Letter DEG n
pattern S = Letter  6 :: Letter DEG n
pattern M = Letter  7 :: Letter DEG n
pattern K = Letter  8 :: Letter DEG n
pattern R = Letter  9 :: Letter DEG n
pattern Y = Letter 10 :: Letter DEG n
pattern B = Letter 11 :: Letter DEG n
pattern D = Letter 12 :: Letter DEG n
pattern H = Letter 13 :: Letter DEG n
pattern V = Letter 14 :: Letter DEG n
pattern N = Letter 15 :: Letter DEG n
instance Bounded (Letter DEG n) where
    minBound = A
    maxBound = N
instance Enum (Letter DEG n) where
    succ N           = error "succ/N:DEG"
    succ (Letter x)  = Letter $ x+1
    pred A           = error "pred/A:DEG"
    pred (Letter x)  = Letter $ x-1
    toEnum k | k>=0 && k<=15 = Letter k
    toEnum k                 = error $ "toEnum/Letter DEG " ++ show k
    fromEnum (Letter k) = k
charDEG = toUpper >>> \case
  'A' -> A
  'C' -> C
  'G' -> G
  'T' -> T
  'U' -> U
  'W' -> W
  'S' -> S
  'M' -> M
  'K' -> K
  'R' -> R
  'Y' -> Y
  'B' -> B
  'D' -> D
  'H' -> H
  'V' -> V
  _   -> N
{-# INLINE charDEG #-}
degChar = \case
  A -> 'A'
  C -> 'C'
  G -> 'G'
  T -> 'T'
  U -> 'U'
  W -> 'W'
  S -> 'S'
  M -> 'M'
  K -> 'K'
  R -> 'R'
  Y -> 'Y'
  B -> 'B'
  D -> 'D'
  H -> 'H'
  V -> 'V'
  N -> 'N'
{-# INLINE degChar #-}
instance Show (Letter DEG n) where
    show c = [degChar c]
degSeq :: MkPrimary p DEG n => p -> Primary DEG n
degSeq = primary
instance MkPrimary (VU.Vector Char) DEG n where
    primary = VU.map charDEG
instance IsString [Letter DEG n] where
    fromString = map charDEG
class Degenerate x where
  fromDegenerate :: Char -> [x]
  toDegenerate   :: [x]  -> Maybe Char
instance Degenerate Char where
  fromDegenerate = maybe [] id . flip lookup iupacXDNAchars
  toDegenerate   = flip lookup (map swap iupacXDNAchars) . nub . sort
instance Degenerate (Letter RNA n) where
    fromDegenerate 'T' = []
    fromDegenerate x   = map dnaTrna $ fromDegenerate x
    toDegenerate   xs  | xs == [R.U] = Just 'U'
                       | otherwise  = toDegenerate $ map rnaTdna xs
instance Degenerate (Letter DNA n) where
    fromDegenerate 'U' = []
    fromDegenerate x   = map charDNA $ fromDegenerate x
    toDegenerate       = toDegenerate . map dnaChar
instance Degenerate (Letter XNA n) where
    fromDegenerate = map charXNA . fromDegenerate
    toDegenerate   = toDegenerate . map xnaChar
iupacXDNAchars :: [(Char,String)]
iupacXDNAchars = map (go . words) . lines . unpack $ iupacNucleotides where
  go [[c],cs] = (c,cs)
{-# NOINLINE iupacXDNAchars #-}
iupacNucleotides :: ByteString
iupacNucleotides = $(embedFile "sources/iupac-nucleotides")