{-# LANGUAGE OverloadedStrings #-}
module Data.STAR.ResidueCodes(allPDBAminoacids
                             ,allFASTAAminoacids
                             ,toSingleLetterCode
                             ,maybeToSingleLetterCode
                             ,toThreeLetterCode      )
where

import Prelude hiding(String)
import Data.Array
import Data.Map hiding((!))
import qualified Data.List(zipWith, zip)
import Data.ByteString.Char8 as BSC

-- | Three-letter codes for standard aminoacids
stdAaTLC :: Array Int BSC.ByteString
stdAaTLC = listArray (0, Prelude.length tlcList) tlcList
  where tlcList = [ "ALA", "CYS", "ASP", "GLU", "PHE"
                  , "GLY", "HIS", "ILE", "LYS", "LEU"
                  , "MET", "ASN", "PRO", "GLN", "ARG"
                  , "SER", "THR", "VAL", "TRP", "TYR" ]

allPDBAminoacids   :: [BSC.ByteString]
allPDBAminoacids   = Data.Array.elems stdAaTLC
allFASTAAminoacids :: [Char]
allFASTAAminoacids = BSC.unpack stdAaSLC

-- | FASTA codes for standard aminoacids
stdAaSLC ::BSC.ByteString
stdAaSLC = BSC.pack "ACDEFGHIKLMNPQRSTVWY"

-- | Finds a three-letter PDB/BMRB aminoacid code for a given single-letter FASTA code
--   (or returns "UNK" for unknown.)
toThreeLetterCode :: Char -> ByteString
toThreeLetterCode c = case BSC.elemIndex c stdAaSLC of
                        Just p  -> stdAaTLC ! p
                        Nothing -> "UNK"

-- | Finds a single-letter FASTA code for a given three-letter PDB code (or returns 'X'.)
toSingleLetterCode :: ByteString -> Char
toSingleLetterCode c = Data.Map.findWithDefault 'X' c tlcMap

maybeToSingleLetterCode :: ByteString -> Maybe Char
maybeToSingleLetterCode c = Data.Map.lookup c tlcMap 

tlcMap = Data.Map.fromList $ Data.List.zip (Data.Array.elems stdAaTLC) (BSC.unpack stdAaSLC)

stopCode='*'