module Bio.Sequence.Parser.Fasta.Internal.Parser
  ( fastaParser
  ) where

import           Bio.Sequence     (Chain (..), Sequence)
import           Data.Text        as T (Text, concat, pack, strip, unpack)
import           Text.Parsec      (char, letter, many, many1, newline, noneOf,
                                   spaces)
import           Text.Parsec.Text (Parser)

data FastaSequence = FastaSequence { sName    :: Text
                                   , residues :: Text
                                   } deriving (Show)

fastaParser :: Parser [Sequence Char]
fastaParser = (fastaSequenceToSequence <$>) <$> many fastaSequence

fastaSequenceToSequence :: FastaSequence -> Sequence Char
fastaSequenceToSequence fs = Chain { chainType = sName fs
                                   , chainResidues = unpack (residues fs)
                                   }

fastaSequence :: Parser FastaSequence
fastaSequence = FastaSequence <$> nameP <*> sequenceP

nameP :: Parser Text
nameP = strip <$> (T.pack <$> nameP')

nameP' :: Parser String
nameP' =  char '>' *> many (noneOf ['\n']) <* newline

sequenceP :: Parser Text
sequenceP = T.concat <$> many lineP

lineP :: Parser Text
lineP = T.pack <$> many1 letter <* spaces