-- |
-- Module      :  Character
-- Description :  Character interface
-- Copyright   :  (c) Dominik Schrempf 2018
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Oct 12 16:24:02 2018.
--
-- See header of 'ELynx.Data.Alphabet.Alphabet'.
module ELynx.Data.Character.Character
  ( Character (..),
    fromChar,
    toChar,
    fromString,
    toString,
    CharacterX (..),
    isGap,
    CharacterI (..),
    isUnknown,
    isIUPAC,
    isStandard,
    convert,
  )
where

import Data.ByteString.Internal (c2w, w2c)
import qualified Data.Set as S
import Data.Vector.Unboxed.Base (Unbox)
import Data.Word8 (Word8)

-- XXX: Remove name clash with ELynx.Data.Alphabet.Alphabet.Character?

-- | A set of characters forms an 'ELynx.Data.Alphabet.Alphabet'. At the
-- moment, 'Word8' is used, since none of the alphabets has more than 255
-- characters.
class (Show a, Read a, Eq a, Ord a, Enum a, Bounded a, Unbox a) => Character a where
  -- | Write characters.
  toWord :: a -> Word8

  -- | Read characters.
  fromWord :: Word8 -> a

-- | Conversion to 'Char'.
toChar :: Character a => a -> Char
toChar = w2c . toWord

-- | Conversion from 'Char'.
fromChar :: Character a => Char -> a
fromChar = fromWord . c2w

-- | Conversion to 'String'.
toString :: Character a => [a] -> String
toString = map toChar

-- | Conversion from 'String'.
fromString :: Character a => String -> [a]
fromString = map fromChar

-- | An extended character type with gaps and unknowns.
class Character a => CharacterX a where
  gap :: a

-- | Is the character a gap or unknown?
isGap :: CharacterX a => a -> Bool
isGap c = c == gap

-- | IUPAC characters with a mapping to extended characters.
class CharacterX a => CharacterI a where
  unknown :: a
  iupac :: [a]
  toStandard :: a -> [a]

-- | Check if a IUPAC 'CharacterI' is unknown (e.g., N for nucleotides).
isUnknown :: CharacterI a => a -> Bool
isUnknown c = c == unknown

iupacLookup :: CharacterI a => S.Set a
iupacLookup = S.fromList iupac

-- | Is the given character a IUPAC character?
isIUPAC :: CharacterI a => a -> Bool
isIUPAC c = c `S.member` iupacLookup

-- | Is the given character a standard character?
isStandard :: CharacterI a => a -> Bool
isStandard c = not $ isIUPAC c

-- | Convert between character classes. May throw error.
convert :: (Character a, Character b) => a -> b
convert = fromWord . toWord