{-| Module : Text.LParse.Metaparser Description : Underlying data structure for sequential parsing Copyright : (c) Marcus Völker, 2018 License : MIT Maintainer : marcus.voelker@rwth-aachen.de This module contains the Metaparser, which is a parser that parses a grammar in EBNF and returns a parser that parses that grammar into an AST -} module Text.LParse.Metaparser (specParse,metaParser,AST (..)) where import Control.Applicative import Control.Arrow import Control.Monad.Fix import Data.Char import Data.List import qualified Data.Map.Strict as M import Data.Maybe import Text.LParse.Parser import Text.LParse.Prebuilt data Token = Literal Char | CharClass String | RuleName String | Integer | Digit | Word | Star | Plus | May | Or | Is | Eoi | LParen | RParen | Sep | Whitespace deriving (Show,Eq) -- | Abstract Syntax Tree generated by the metaparser, suitable for postprocessing data AST = Node String [AST] | ILeaf Integer | SLeaf String | EOI deriving Eq instance Show AST where show (Node s cs) = s ++ "(" ++ intercalate "," (map show cs) ++ ")" show (ILeaf i) = show i show (SLeaf s) = s show EOI = "$" escaped :: Parser r String Token escaped = consumeSReturn 'i' Integer <|> consumeSReturn 'd' Digit <|> consumeSReturn 'w' Word <|> consumeSReturn '\\' (Literal '\\') <|> consumeSReturn '*' (Literal '*') <|> consumeSReturn '+' (Literal '+') <|> consumeSReturn '?' (Literal '?') <|> consumeSReturn '[' (Literal '[') <|> consumeSReturn ']' (Literal ']') <|> consumeSReturn '(' (Literal '(') <|> consumeSReturn ')' (Literal ')') <|> consumeSReturn '$' (Literal '$') charclass :: Parser r String Token charclass = CharClass <$> some (nParse (/=']') tokenReturn "Expected character") simpleSpecial :: Parser r String Token simpleSpecial = consumeSReturn '*' Star <|> consumeSReturn '+' Plus <|> consumeSReturn '?' May <|> consumeSReturn '|' Or <|> consumeSReturn '$' Eoi <|> consumeSReturn '(' LParen <|> consumeSReturn ')' RParen <|> consumeReturn "::=" Is <|> consumeSReturn ';' Sep whitespace :: Parser r String Token whitespace = some (nParse isSpace tokenReturn "Expected Space") >> return Whitespace ruleName :: Parser r String Token ruleName = consumeSingle '%' >> (RuleName <$> word) metaTokenizer :: Parser r String [Token] metaTokenizer = many ( (consumeSingle '\\' >> escaped) <|> surround "[]" charclass <|> simpleSpecial <|> ruleName <|> whitespace <|> (Literal <$> tokenReturn) ) iLeafParser :: Parser r [Token] (Parser r' String AST) iLeafParser = consumeSReturn Integer (ILeaf <$> integer) dLeafParser :: Parser r [Token] (Parser r' String AST) dLeafParser = consumeSReturn Digit (ILeaf <$> digit) sLeafParser :: Parser r [Token] (Parser r' String AST) sLeafParser = consumeSReturn Word (SLeaf <$> word) eoiParser :: Parser r [Token] (Parser r' String AST) eoiParser = consumeSReturn Eoi (eoi >> return EOI) isRuleName :: Token -> Bool isRuleName (RuleName _) = True isRuleName _ = False getRuleName :: Token -> String getRuleName (RuleName s) = s subRuleParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String AST) subRuleParser m = nParse isRuleName (tokenParse ((m M.!) . getRuleName)) "Expected RuleName" charClassCharParser :: String -> Parser r String Char charClassCharParser (c:s) | c == '^' = nParse (not . (`elem` s)) tokenReturn ("Expected not [" ++ s ++ "]") charClassCharParser s = nParse (`elem` s) tokenReturn ("Expected [" ++ s ++ "]") isCharClass :: Token -> Bool isCharClass (CharClass _) = True isCharClass _ = False getCharClass :: Token -> String getCharClass (CharClass s) = s charClassParser :: Parser r [Token] (Parser r' String AST) charClassParser = nParse isCharClass (tokenParse (fmap (SLeaf . return) . charClassCharParser . getCharClass)) "Expected character class" isLiteral :: Token -> Bool isLiteral (Literal _) = True isLiteral _ = False getLiteral :: Token -> Char getLiteral (Literal s) = s parseLiteral :: Parser r [Token] Char parseLiteral = nParse isLiteral (tokenParse getLiteral) "Expected Literal" charParser :: Parser r [Token] (Parser r' String AST) charParser = fmap (SLeaf . return) . (\c -> consumeSReturn c c) <$> parseLiteral atomParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String AST) atomParser m = iLeafParser <|> dLeafParser <|> sLeafParser <|> eoiParser <|> charClassParser <|> subRuleParser m <|> charParser starFreeParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String [AST]) starFreeParser m = surround [LParen,RParen] (cfexParser m) <|> (fmap return <$> atomParser m) concatFreeParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String [AST]) concatFreeParser m = do sf <- starFreeParser m (consumeSingle Star >> return (concat <$> many sf)) <|> (consumeSingle Plus >> return (concat <$> some sf)) <|> (consumeSingle May >> return (concat . maybeToList <$> try sf)) <|> return sf orFreeParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String [AST]) orFreeParser m = fmap concat . sequenceA <$> some (concatFreeParser m) cfexParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String [AST]) cfexParser m = foldl1 (<|>) <$> sepSome (consumeSingle Or) (orFreeParser m) ruleParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (String,Parser r' String AST) ruleParser m = (\s as-> (s,Node s <$> as)) <$> (many (nParse isLiteral (tokenParse getLiteral) "Expected Literal") << consumeSingle Is) <*> cfexParser m rulesetParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (M.Map String (Parser r' String AST)) rulesetParser m = M.fromList<$> sepMany (consumeSingle Sep) (ruleParser m) rulesetLoop :: Parser r [Token] (M.Map String (Parser r' String AST)) rulesetLoop = pfix rulesetParser combine :: Maybe (M.Map String (Parser r' String AST)) -> Parser r [Token] (Parser r' String [AST]) combine Nothing = cfexParser M.empty combine (Just rs) = cfexParser rs fullParser :: Parser r [Token] (Parser r' String AST) fullParser = try (rulesetLoop << consumeSingle Sep) >>= (fmap (fmap (Node "_")) . combine) parserParser :: Parser r [Token] (Parser r' String AST) parserParser = fullParser << eoi -- | Parser that takes a grammar and returns a parser that parses that grammar into an @AST@ metaParser :: Parser r String (Parser r' String AST) metaParser = metaTokenizer >>> skip [Whitespace] parserParser -- | Convenience function chaining creation and usage of the @metaParser@ into a single invocation. specParse :: String -> String -> Either String AST specParse g i = doParse metaParser g >>= (`doParse` i)