module Bio.FASTA.Parser
( fastaP
, fastaPGeneric
) where
import Bio.FASTA.Type (Fasta, FastaItem (..))
import Bio.Sequence (BareSequence, bareSequence)
import Data.Attoparsec.Text (Parser, char, choice, endOfInput, endOfLine, many', many1', satisfy,
takeWhile)
import Data.Char (isLetter)
import Data.Text (Text, strip)
import Prelude hiding (takeWhile)
fastaP :: Parser (Fasta Char)
fastaP = fastaPGeneric isLetter
fastaPGeneric :: (Char -> Bool) -> Parser (Fasta Char)
fastaPGeneric = many' . item
item :: (Char -> Bool) -> Parser (FastaItem Char)
item predicate = FastaItem <$> seqName <*> fastaSeq predicate
seqName :: Parser Text
seqName = strip <$> (char '>' *> tabs *> takeWhile (`notElem` ['\n', '\r']) <* tabs <* eol)
fastaSeq :: (Char -> Bool) -> Parser (BareSequence Char)
fastaSeq predicate = bareSequence . mconcat <$> many' (line predicate)
line :: (Char -> Bool) -> Parser String
line predicate = concat <$> many1' (many1' (satisfy predicate) <* many' (char ' ')) <* eol
eol :: Parser ()
eol = tabs *> choice [slashN, endOfInput]
slashN :: Parser ()
slashN = () <$ many1' endOfLine
tabs :: Parser ()
tabs = () <$ many' (char '\t')