{- | Module : Text.ParserCombinators.Parsec.Rfc2234NS Copyright : (c) 2008 Peter Simons License : BSD3 Maintainer : jfredett@gmail.com Stability : provisional Portability : portable This module provides parsers for the grammar defined in RFC2234, \"Augmented BNF for Syntax Specifications: ABNF\", . The terminal called @char@ in the RFC is called 'character' here to avoid conflicts with Parsec's 'char' function. Addendum for Nonstandard Version: This module deviates from the RFC currently in * none currently. These allowances are subject to change, and should not be used when parsing incoming messages, only for parsing messages that have been stored on disk. The goal of these nonstandard Parsers is to provide a higher probability of parsing _common_ Headers (rather than only those explicitly defined in the RFC) as well as allowing for potential oddities / changes that may occur during storage of an email message. These parsers have be rebranded so as not to conflict with the standard parsers available from the excellent 'hsemail' package, upon which this package depends. For patches to this package only (namely 'hsemail-ns', patches should be sent to , for patches to the Proper parsers, you can send them to the original maintainer. -} module Text.ParserCombinators.Parsec.Rfc2234NS where import Text.ParserCombinators.Parsec import Data.Char ( toUpper, chr, ord ) import Control.Monad ( liftM2 ) ---------------------------------------------------------------------- -- * Parser Combinators ---------------------------------------------------------------------- -- |Case-insensitive variant of Parsec's 'char' function. caseChar :: Char -> CharParser st Char caseChar c = satisfy (\x -> toUpper x == toUpper c) -- |Case-insensitive variant of Parsec's 'string' function. caseString :: String -> CharParser st () caseString cs = mapM_ caseChar cs cs -- |Match a parser at least @n@ times. manyN :: Int -> GenParser a b c -> GenParser a b [c] manyN n p | n <= 0 = return [] | otherwise = liftM2 (++) (count n p) (many p) -- |Match a parser at least @n@ times, but no more than @m@ times. manyNtoM :: Int -> Int -> GenParser a b c -> GenParser a b [c] manyNtoM n m p | n < 0 = return [] | n > m = return [] | n == m = count n p | n == 0 = foldr (<|>) (return []) (map (\x -> try $ count x p) (reverse [1..m])) | otherwise = liftM2 (++) (count n p) (manyNtoM 0 (m-n) p) -- |Helper function to generate 'Parser'-based instances for -- the 'Read' class. parsec2read :: Parser a -> String -> [(a, String)] parsec2read f x = either (error . show) id (parse f' "" x) where f' = do { a <- f; res <- getInput; return [(a,res)] } ---------------------------------------------------------------------- -- * Primitive Parsers ---------------------------------------------------------------------- -- |Match any character of the alphabet. alpha :: CharParser st Char alpha = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'])) "alphabetic character" -- |Match either \"1\" or \"0\". bit :: CharParser st Char bit = oneOf "01" "bit ('0' or '1')" -- |Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is). character :: CharParser st Char character = satisfy (\c -> (c >= chr 1) && (c <= chr 127)) "7-bit character excluding NUL" -- |Match the carriage return character @\\r@. cr :: CharParser st Char cr = char '\r' "carriage return" -- |Match returns the linefeed character @\\n@. lf :: CharParser st Char lf = char '\n' "linefeed" -- |Match the Internet newline @\\r\\n@. crlf :: CharParser st String crlf = (choice . map try) [cr >> lf, lf >> cr, cr, lf] >>= return . return "eol sequence" -- |Match any US-ASCII control character. That is -- any character with a decimal value in the range of [0..31,127]. ctl :: CharParser st Char ctl = satisfy (\c -> ord c `elem` ([0..31] ++ [127])) "control character" -- |Match the double quote character \"@\"@\". dquote :: CharParser st Char dquote = char (chr 34) "double quote" -- |Match any character that is valid in a hexadecimal number; -- [\'0\'..\'9\'] and [\'A\'..\'F\',\'a\'..\'f\'] that is. hexdig :: CharParser st Char hexdig = hexDigit "hexadecimal digit" -- |Match the tab (\"@\\t@\") character. htab :: CharParser st Char htab = char '\t' "horizontal tab" -- |Match \"linear white-space\". That is any number of consecutive -- 'wsp', optionally followed by a 'crlf' and (at least) one more -- 'wsp'. lwsp :: CharParser st String lwsp = do r <- choice [ many1 wsp , try (liftM2 (++) crlf (many1 wsp)) ] rs <- option [] lwsp return (r ++ rs) "linear white-space" -- |Match /any/ character. octet :: CharParser st Char octet = anyChar "any 8-bit character" -- |Match the space. sp :: CharParser st Char sp = char ' ' "space" -- |Match any printable ASCII character. (The \"v\" stands for -- \"visible\".) That is any character in the decimal range of -- [33..126]. vchar :: CharParser st Char vchar = satisfy (\c -> (c >= chr 33) && (c <= chr 126)) "printable character" -- |Match either 'sp' or 'htab'. wsp :: CharParser st Char wsp = sp <|> htab "white-space" -- ** Useful additions -- |Match a \"quoted pair\". Any characters (excluding CR and -- LF) may be quoted. quotedPair :: CharParser st String quotedPair = do char '\\' r <- noneOf "\r\n" return ['\\',r] "quoted pair" -- |Match a quoted string. The specials \"@\\@\" and -- \"@\"@\" must be escaped inside a quoted string; CR and -- LF are not allowed at all. quotedString :: CharParser st String quotedString = do dquote r <- many qcont dquote return ("\"" ++ concat r ++ "\"") "quoted string" where qtext = noneOf "\\\"\r\n" qcont = many1 qtext <|> quotedPair