{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE RankNTypes #-} -- | Implements a Pratt ("top down operator precendence") Parser as a layer -- on top of Parsec. See http://javascript.crockford.com/tdop/tdop.html -- and http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ -- for descriptions of the algorithm. -- -- =A note on type usage -- In all of the types used in this module, the type parameters are named as follows: -- -- - 's' => the stream type which will be parsed (typically String) -- - 'u' => the user state type for the parser (e.g. '()') -- - 'm' => the monad underlying the parser (e.g. 'Identity') -- - 'e' => the type of the expression returned by the parser -- - 'o' => the type of operators (typically String). -- - 't' => the type of tokens produced from the stream (not actually -- referenced here, but required to exist by Parsec) -- -- The following instances are required to exist: -- -- - Stream s m t -- - Show t -- - Monad m -- - Ord o -- - Show o module Text.Parsec.PrattParser where ------------------------------------------------------------------------------ -- dependencies ------------------------------------------------------------------------------ import Text.Parsec import Control.Monad import qualified Data.Map as Map ------------------------------------------------------------------------------ -- basic data types ------------------------------------------------------------------------------ -- | Identifies the precedence and associativity of an operator. Higher -- numbers bind more strongly to the adjacent terms. data OperatorPrecedence = LAssoc Int | RAssoc Int deriving (Show, Eq) -- | Provides all the information needed to handle an infix operator, i.e. -- the operator's symbol (type "o", typically a string), precedence, -- and its 'LeftDenotation'. data OperatorInfo s u m e o = OperatorInfo o OperatorPrecedence (LeftDenotation s u m e o) -- | Provides all the information needed to handle a prefix operator, i.e. -- the operator's symbol (type "o", typicall a string), and either a -- 'PrefixBinder' (for simple operators) or a 'NullDenotation' (for operators -- that need to perform additional parsing). data PrefixOperatorInfo s u m e o = SimplePrefixOperator o (PrefixBinder s u m e o) | PrefixParserOperator o (NullDenotation s u m e) -- | A PrecedenceParser is a function that, given a precedence, parses expressions -- which contain operators whose precedence is greater than or equal to the -- specified precedence. type PrecedenceParser s u m e = OperatorPrecedence -> ParsecT s u m e -- | A NullDenotation is a function that generates a parser for terms that do not have a left hand -- term to bind to. It receives a 'PrecedenceParser' as an argument that can be -- used to recursively parse an expression. type NullDenotation s u m e = PrecedenceParser s u m e -> ParsecT s u m e -- | a PrefixBinder binds a prefix operator with the expression to its right type PrefixBinder s u m e o = PrefixOperatorInfo s u m e o -> e -> e -- | a LeftDenotation is a function for producing a parser that binds to a -- left hand term. Its arguments are: -- -- * The 'OperatorInfo' of the operator being parsed -- * The expression for the term on the left -- * A function that can be used to parse additional terms -- up to a given precedence (which should usually be the -- precedence of the operator itself). type LeftDenotation s u m e o = OperatorInfo s u m e o -> e -> PrecedenceParser s u m e -> ParsecT s u m e -- | type of parser transformers that can be used to remove extraneous text -- (eg removing whitespace and/or comments) before a useful token occurs type ContentStripper s u m a = ParsecT s u m a -- | Type for defining a parser that returns operator symbols. type OperatorParser s u m o = ParsecT s u m o ------------------------------------------------------------------------------ -- Type manipulation utilities ------------------------------------------------------------------------------ -- | Return the precedence of an infix operator operatorInfoPrecedence :: OperatorInfo s u m e o -> OperatorPrecedence operatorInfoPrecedence (OperatorInfo _ p _) = p -- | Return the symbol of an infix operator operatorInfoName :: OperatorInfo s u m e o -> o operatorInfoName (OperatorInfo n _ _) = n -- | Return the symbol of a prefix operator prefixOperatorInfoName :: PrefixOperatorInfo s u m e o -> o prefixOperatorInfoName (SimplePrefixOperator n _) = n prefixOperatorInfoName (PrefixParserOperator n _) = n ------------------------------------------------------------------------------ -- Parser ------------------------------------------------------------------------------ -- | Builds a Pratt parser for expressions with a given set of operators -- and parsers for individual components. The arguments are: -- -- * A list of infix operator descriptions -- * A list of prefix operator descriptions -- * A content stripper (a parser whose return value is ignored, which strips -- whitespace/comments/anything else that isn't part of the expression -- * An operator parser, which returns symbols as used in the operator -- descriptions -- * A 'NullDenotation' that parses individual terms and recursively calls -- back into the parser to bind expressions to them. buildPrattParser :: forall s u m e o t a . Stream s m t => Show t => Monad m => Ord o => Show o => [OperatorInfo s u m e o] -> [PrefixOperatorInfo s u m e o] -> ContentStripper s u m a -> OperatorParser s u m o -> NullDenotation s u m e -> ParsecT s u m e buildPrattParser operators prefixOperators strip operator nud = parseExpr where parseExpr :: ParsecT s u m e parseExpr = parseExprWithMinimumPrecedence (RAssoc 0) <* strip parseExprWithMinimumPrecedence :: OperatorPrecedence -> ParsecT s u m e parseExprWithMinimumPrecedence precedence = do strip term <- nudOrPrefixOp strip parseInfix (precedenceValue precedence) term where precedenceValue (LAssoc n) = n + 1 precedenceValue (RAssoc n) = n -- parse prefix operators or pass on to null denotation nudOrPrefixOp :: ParsecT s u m e nudOrPrefixOp = try parsePrefixOp <|> nud parseExprWithMinimumPrecedence parsePrefixOp :: ParsecT s u m e parsePrefixOp = do op <- try (operator <* strip) case Map.lookup op prefixOperatorMap of Just opInfo@(SimplePrefixOperator _ binder) -> do rhs <- nudOrPrefixOp return $ binder opInfo rhs Just (PrefixParserOperator _ pnud) -> pnud parseExprWithMinimumPrecedence Nothing -> fail ("Operator " ++ (show op) ++ " not allowed as a prefix") -- given an already parsed expression, parse that -- may optionally follow it parseInfix :: Int -> e -> ParsecT s u m e parseInfix precedence lhs = do maybeOp <- optionMaybe (try $ nextOperator precedence) case maybeOp of Just name -> (bindOperatorLeft name lhs >>= parseInfix precedence) <* strip Nothing -> return lhs where -- if we're at base precedence level, all operators should be -- accepted, so we want errors if they're not recognised. nextOperator 0 = operator -- otherwise, we only accept operators with precedence equal to -- or higher than the current precedence nextOperator p = operatorWithMinimumPrecedence p bindOperatorLeft :: o -> e -> ParsecT s u m e bindOperatorLeft name lhs = case Map.lookup name operatorMap of Just opInfo@(OperatorInfo _ _ leftDenotation) -> leftDenotation opInfo lhs parseExprWithMinimumPrecedence Nothing -> error $ "Unknown operator \"" ++ (show name) ++ "\"" operatorMap :: Map.Map o (OperatorInfo s u m e o) operatorMap = mapFrom operatorInfoName operators mapFrom :: Ord k => (v -> k) -> [v] -> Map.Map k v mapFrom getKey values = Map.fromList (map (\x -> (getKey x, x)) values) prefixOperatorMap :: Map.Map o (PrefixOperatorInfo s u m e o) prefixOperatorMap = mapFrom prefixOperatorInfoName prefixOperators operatorPrecedence :: o -> Maybe OperatorPrecedence operatorPrecedence name = liftM operatorInfoPrecedence (Map.lookup name operatorMap) -- parse an operator only if the next operator has at least minimum -- precedence (will usually be used with 'try', so error message caused -- on failure should never appear in output) operatorWithMinimumPrecedence :: Int -> ParsecT s u m o operatorWithMinimumPrecedence m = do op <- operator strip case operatorPrecedence op of Just (LAssoc precedence) | precedence >= m -> return op Just (RAssoc precedence) | precedence >= m -> return op Just _ -> fail "Precedence below minimum expected" Nothing -> fail $ "Illegal operator " ++ (show op)