module Bio.FASTA.Parser
  ( fastaP
  , fastaPGeneric
  ) where

import Bio.FASTA.Type       (Fasta, FastaItem (..))
import Bio.Sequence         (BareSequence, bareSequence)
import Data.Attoparsec.Text (Parser, char, choice, endOfInput, endOfLine, many', many1', satisfy,
                             takeWhile)
import Data.Char            (isLetter)
import Data.Text            (Text, strip)
import Prelude              hiding (takeWhile)

-- | Parser of .fasta file.
--
fastaP :: Parser (Fasta Char)
fastaP = fastaPGeneric isLetter

fastaPGeneric :: (Char -> Bool) -> Parser (Fasta Char)
fastaPGeneric = many' . item

item :: (Char -> Bool) -> Parser (FastaItem Char)
item predicate = FastaItem <$> seqName <*> fastaSeq predicate

seqName :: Parser Text
seqName = strip <$> (char '>' *> tabs *> takeWhile (`notElem` ['\n', '\r']) <* tabs <* eol)

fastaSeq :: (Char -> Bool) -> Parser (BareSequence Char)
fastaSeq predicate = bareSequence . mconcat <$> many' (line predicate)

line :: (Char -> Bool) -> Parser String
line predicate = concat <$> many1' (many1' (satisfy predicate) <* many' (char ' ')) <* eol

eol :: Parser ()
eol = tabs *> choice [slashN, endOfInput]

slashN :: Parser ()
slashN = () <$ many1' endOfLine

tabs :: Parser ()
tabs = () <$ many' (char '\t')