module Bio.Sequence.Parser.Fasta.Internal.Parser ( fastaParser ) where import Bio.Sequence (Chain (..), Sequence) import Data.Text as T (Text, concat, pack, strip, unpack) import Text.Parsec (char, letter, many, many1, newline, noneOf, spaces) import Text.Parsec.Text (Parser) data FastaSequence = FastaSequence { sName :: Text , residues :: Text } deriving (Show) fastaParser :: Parser [Sequence Char] fastaParser = (fastaSequenceToSequence <$>) <$> many fastaSequence fastaSequenceToSequence :: FastaSequence -> Sequence Char fastaSequenceToSequence fs = Chain { chainType = sName fs , chainResidues = unpack (residues fs) } fastaSequence :: Parser FastaSequence fastaSequence = FastaSequence <$> nameP <*> sequenceP nameP :: Parser Text nameP = strip <$> (T.pack <$> nameP') nameP' :: Parser String nameP' = char '>' *> many (noneOf ['\n']) <* newline sequenceP :: Parser Text sequenceP = T.concat <$> many lineP lineP :: Parser Text lineP = T.pack <$> many1 letter <* spaces