----------------------------------------------------------------------------- -- | -- Module : Data.Ngrams.Parser -- Copyright : (C) 2014 Yorick Laupa -- License : (see the file LICENSE) -- -- Maintainer : Yorick Laupa <yo.eight@gmail.com> -- Stability : provisional -- Portability : non-portable -- ---------------------------------------------------------------------------- module Data.Ngrams.Parser ( parserBigram , parserTrigram , parserQuadgram , parserPentagram ) where ---------------------------------------------------------------------------- import Data.Char (isSpace) ---------------------------------------------------------------------------- import Data.Attoparsec.Text (Parser, decimal, skipSpace, takeWhile1) import Data.Text (Text) ---------------------------------------------------------------------------- import Data.Ngrams.Type ---------------------------------------------------------------------------- parserBigram :: Parser Bigram parserBigram = do freq <- decimal skipSpace w1 <- _word skipSpace w2 <- _word return $ Bigram freq w1 w2 ---------------------------------------------------------------------------- parserTrigram :: Parser Trigram parserTrigram = do Bigram freq w1 w2 <- parserBigram skipSpace w3 <- _word return $ Trigram freq w1 w2 w3 ---------------------------------------------------------------------------- parserQuadgram :: Parser Quadgram parserQuadgram = do Trigram freq w1 w2 w3 <- parserTrigram skipSpace w4 <- _word return $ Quadgram freq w1 w2 w3 w4 ---------------------------------------------------------------------------- parserPentagram :: Parser Pentagram parserPentagram = do Quadgram freq w1 w2 w3 w4 <- parserQuadgram skipSpace w5 <- _word return $ Pentagram freq w1 w2 w3 w4 w5 ---------------------------------------------------------------------------- _word :: Parser Text _word = takeWhile1 (not . isSpace)