{-# LANGUAGE FlexibleContexts #-} {- | Module : Text.Parsec.Rfc2234 Copyright : (c) 2013 Peter Simons License : BSD3 Maintainer : simons@cryp.to Stability : provisional Portability : portable This module provides parsers for the grammar defined in RFC2234, \"Augmented BNF for Syntax Specifications: ABNF\", . The terminal called @char@ in the RFC is called 'character' here to avoid conflicts with Parsec's 'char' function. -} module Text.Parsec.Rfc2234 where import Control.Monad ( liftM2, replicateM ) import Data.Char ( toUpper, chr, ord ) import Text.Parsec hiding (crlf) import qualified Text.Parsec.String as PS -- Customize hlint ... {-# ANN module "HLint: ignore Use camelCase" #-} ---------------------------------------------------------------------- -- * Parser Combinators ---------------------------------------------------------------------- -- |Case-insensitive variant of Parsec's 'char' function. caseChar :: Stream s m Char => Char -> ParsecT s u m Char caseChar c = satisfy (\x -> toUpper x == toUpper c) -- |Case-insensitive variant of Parsec's 'string' function. caseString :: Stream s m Char => String -> ParsecT s u m () caseString cs = mapM_ caseChar cs cs -- |Match a parser at least @n@ times. -- manyN :: Int -> GenParser a b c -> GenParser a b [c] manyN :: Int -> ParsecT s u m a -> ParsecT s u m [a] manyN n p | n <= 0 = return [] | otherwise = liftM2 (++) (replicateM n p) (many p) -- |Match a parser at least @n@ times, but no more than @m@ times. manyNtoM :: Int -> Int -> ParsecT s u m a -> ParsecT s u m [a] manyNtoM n m p | n < 0 = return [] | n > m = return [] | n == m = replicateM n p | n == 0 = foldr (<|>) (return []) (map (\x -> try $ replicateM x p) (reverse [1..m])) | otherwise = liftM2 (++) (replicateM n p) (manyNtoM 0 (m-n) p) -- |Helper function to generate 'Parser'-based instances for -- the 'Read' class. parsec2read :: PS.Parser a -> String -> [(a, String)] parsec2read f x = either (error . show) id (parse f' "" x) where f' = do { a <- f; res <- getInput; return [(a,res)] } ---------------------------------------------------------------------- -- * Primitive Parsers ---------------------------------------------------------------------- -- |Match any character of the alphabet. alpha :: Stream s m Char => ParsecT s u m Char alpha = satisfy (\c -> c `elem` (['A'..'Z'] ++ ['a'..'z'])) "alphabetic character" -- |Match either \"1\" or \"0\". bit :: Stream s m Char => ParsecT s u m Char bit = oneOf "01" "bit ('0' or '1')" -- |Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is). character :: Stream s m Char => ParsecT s u m Char character = satisfy (\c -> (c >= chr 1) && (c <= chr 127)) "7-bit character excluding NUL" -- |Match the carriage return character @\\r@. cr :: Stream s m Char => ParsecT s u m Char cr = char '\r' "carriage return" -- |Match returns the linefeed character @\\n@. lf :: Stream s m Char => ParsecT s u m Char lf = char '\n' "linefeed" -- |Match the Internet newline @\\r\\n@. crlf :: Stream s m Char => ParsecT s u m String crlf = do c <- cr l <- lf return [c,l] "carriage return followed by linefeed" -- |Match any US-ASCII control character. That is -- any character with a decimal value in the range of [0..31,127]. ctl :: Stream s m Char => ParsecT s u m Char ctl = satisfy (\c -> ord c `elem` ([0..31] ++ [127])) "control character" -- |Match the double quote character \"@\"@\". dquote :: Stream s m Char => ParsecT s u m Char dquote = char (chr 34) "double quote" -- |Match any character that is valid in a hexadecimal number; -- [\'0\'..\'9\'] and [\'A\'..\'F\',\'a\'..\'f\'] that is. hexdig :: Stream s m Char => ParsecT s u m Char hexdig = hexDigit "hexadecimal digit" -- |Match the tab (\"@\\t@\") character. htab :: Stream s m Char => ParsecT s u m Char htab = char '\t' "horizontal tab" -- |Match \"linear white-space\". That is any number of consecutive -- 'wsp', optionally followed by a 'crlf' and (at least) one more -- 'wsp'. lwsp :: Stream s m Char => ParsecT s u m String lwsp = do r <- choice [ many1 wsp , try (liftM2 (++) crlf (many1 wsp)) ] rs <- option [] lwsp return (r ++ rs) "linear white-space" -- |Match /any/ character. octet :: Stream s m Char => ParsecT s u m Char octet = anyChar "any 8-bit character" -- |Match the space. sp :: Stream s m Char => ParsecT s u m Char sp = char ' ' "space" -- |Match any printable ASCII character. (The \"v\" stands for -- \"visible\".) That is any character in the decimal range of -- [33..126]. vchar :: Stream s m Char => ParsecT s u m Char vchar = satisfy (\c -> (c >= chr 33) && (c <= chr 126)) "printable character" -- |Match either 'sp' or 'htab'. wsp :: Stream s m Char => ParsecT s u m Char wsp = sp <|> htab "white-space" -- ** Useful additions -- |Match a \"quoted pair\". Any characters (excluding CR and -- LF) may be quoted. quoted_pair :: Stream s m Char => ParsecT s u m String quoted_pair = do _ <- char '\\' r <- noneOf "\r\n" return ['\\',r] "quoted pair" -- |Match a quoted string. The specials \"@\\@\" and -- \"@\"@\" must be escaped inside a quoted string; CR and -- LF are not allowed at all. quoted_string :: Stream s m Char => ParsecT s u m String quoted_string = do _ <- dquote r <- many qcont _ <- dquote return ("\"" ++ concat r ++ "\"") "quoted string" where qtext = noneOf "\\\"\r\n" qcont = many1 qtext <|> quoted_pair