{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeSynonymInstances #-} {- BNF Converter: Abstract syntax Copyright (C) 2004 Author: Markus Forsberg, Michael Pellauer, Aarne Ranta -} module BNFC.CF ( -- Types. CF, CFG(..), Rule, Rul(..), npRule, valCat, lookupRule, InternalRule(..), Pragma(..), Exp(..), Base(..), Type(..), Signature, Literal, Symbol, KeyWord, Position(..), noPosition, prettyPosition, npIdentifier, WithPosition(..), blendInPosition, RString, RCat, Cat(..), strToCat, catToStr, BaseCat, TokenCat, catString, catInteger, catDouble, catChar, catIdent, NonTerminal, SentForm, Fun, RFun, IsFun(..), Data, -- describes the abstract syntax of a grammar cf2data, -- translates a grammar to a Data object. cf2dataLists, -- translates to a Data with List categories included. getAbstractSyntax, -- Literal categories, constants, firstEntry, -- the first entry or the first value category baseTokenCatNames, -- "Char", "Double", "Integer", "String" specialCats, -- ident specialCatsP, -- all literals specialData, -- special data isCoercion, -- wildcards in grammar (avoid syntactic clutter) isDefinedRule, -- defined rules (allows syntactic sugar) isProperLabel, -- not coercion or defined rule allCats, -- all categories of a grammar allParserCats, allParserCatsNorm, reallyAllCats, allCatsNorm, allCatsIdNorm, allEntryPoints, reservedWords, cfTokens, literals, findAllReversibleCats, -- find all reversible categories identCat, -- transforms '[C]' to ListC (others, unchanged). isParsable, rulesForCat, -- rules for a given category rulesForNormalizedCat, -- rules for a given category ruleGroups, -- Categories are grouped with their rules. ruleGroupsInternals, --As above, but includes internal cats. allNames, -- Checking for non-unique names, like @Name. Name ::= Ident;@. filterNonUnique, isList, -- Checks if a category is a list category. isTokenCat, maybeTokenCat, baseCat, sameCat, -- Information functions for list functions. isNilFun, -- empty list function? ([]) isOneFun, -- one element list function? (:[]) hasOneFunc, getCons, getSeparatorByPrecedence, isConsFun, -- constructor function? (:) isNilCons, -- either three of above? isEmptyListCat, -- checks if the list permits [] revSepListRule, -- reverse a rule, if it is of form C t [C]. normCat, isDataCat, isDataOrListCat, normCatOfList, -- Removes precendence information and enclosed List. C1 => C, C2 => C catOfList, comments, -- translates the pragmas into two list containing the s./m. comments numberOfBlockCommentForms, tokenPragmas, -- get the user-defined regular expression tokens tokenNames, -- get the names of all user-defined tokens precCat, -- get the precendence level of a Cat C1 => 1, C => 0 precRule, -- get the precendence level of the value category of a rule. isUsedCat, isPositionCat, hasPositionTokens, hasIdent, hasIdentLikeTokens, hasLayout, layoutPragmas, sigLookup -- Get the type of a rule label. ) where import Control.Monad (guard) import Data.Char import Data.Function (on) import Data.List (nub, intersperse, sort, group, intercalate, find) import qualified Data.List as List import Data.List.NonEmpty (pattern (:|), (<|)) import qualified Data.List.NonEmpty as List1 import Data.Maybe import Data.Map (Map) import qualified Data.Map as Map import Data.Set (Set) import qualified Data.Set as Set import Data.String (IsString(..)) import BNFC.Abs (Reg()) import BNFC.Par (pCat) import BNFC.Lex (tokens) import qualified BNFC.Abs as Abs import BNFC.Utils (spanEnd) type List1 = List1.NonEmpty -- | A context free grammar consists of a set of rules and some extended -- information (e.g. pragmas, literals, symbols, keywords). type CF = CFG RFun -- | A rule consists of a function name, a main category and a sequence of -- terminals and non-terminals. -- -- @ -- function_name . Main_Cat ::= sequence -- @ type Rule = Rul RFun -- | Polymorphic rule type. -- N.B.: Was originally made polymorphic for the sake of removed backend --profile. data Rul function = Rule { funRule :: function -- ^ The function (semantic action) of a rule. -- In order to be able to generate data types this must be a constructor -- (or an identity function). , valRCat :: RCat -- ^ The value category, i.e., the defined non-terminal. , rhsRule :: SentForm -- ^ The sentential form, i.e., -- the list of (non)terminals in the right-hand-side of a rule. , internal :: InternalRule -- ^ Is this an "internal" rule only for the AST and printing, -- not for parsing? } deriving (Eq, Functor) data InternalRule = Internal -- ^ @internal@ rule (only for AST & printer) | Parsable -- ^ ordinary rule (also for parser) deriving (Eq) instance (Show function) => Show (Rul function) where show (Rule f cat rhs internal) = unwords $ (if internal == Internal then ("internal" :) else id) $ show f : "." : show cat : "::=" : map (either show id) rhs -- | A sentential form is a sequence of non-terminals or terminals. type SentForm = [Either Cat String] -- | Type of context-free grammars (GFG). data CFG function = CFG { cfgPragmas :: [Pragma] , cfgUsedCats :: Set Cat -- ^ Categories used by the parser. , cfgLiterals :: [Literal] -- ^ @Char, String, Ident, Integer, Double@. -- @String@s are quoted strings, -- and @Ident@s are unquoted. , cfgSymbols :: [Symbol] -- ^ Symbols in the grammar, e.g. “*”, “->”. , cfgKeywords :: [KeyWord] -- ^ Reserved words, e.g. @if@, @while@. , cfgReversibleCats :: [Cat] -- ^ Categories that can be made left-recursive. , cfgRules :: [Rul function] , cfgSignature :: Signature -- ^ Types of rule labels, computed from 'cfgRules'. } deriving (Functor) instance (Show function) => Show (CFG function) where show CFG{..} = unlines $ map show cfgRules -- | Types of the rule labels, together with the position of the rule label. type Signature = Map String (WithPosition Type) -- | Type of a non-terminal. data Base = BaseT String | ListT Base deriving (Eq, Ord) -- | Type of a rule label. data Type = FunT [Base] Base deriving (Eq, Ord) instance Show Base where show (BaseT x) = x show (ListT t) = "[" ++ show t ++ "]" instance Show Type where show (FunT ts t) = unwords $ map show ts ++ ["->", show t] -- | Expressions for function definitions. data Exp = App String [Exp] -- ^ (Possibly defined) label applied to expressions. | Var String -- ^ Function parameter. | LitInt Integer | LitDouble Double | LitChar Char | LitString String deriving (Eq) instance Show Exp where showsPrec p e = case listView e of Right es -> showString "[" . foldr (.) id (intersperse (showString ", ") $ map shows es) . showString "]" Left (Var x) -> showString x Left (App x []) -> showString x Left (App "(:)" [e1,e2]) -> showParen (p>0) $ showsPrec 1 e1 . showString " : " . shows e2 Left (App x es) -> showParen (p>1) $ foldr (.) id $ intersperse (showString " ") $ showString x : map (showsPrec 2) es Left (LitInt n) -> shows n Left (LitDouble x) -> shows x Left (LitChar c) -> shows c Left (LitString s) -> shows s where listView (App "[]" []) = Right [] listView (App "(:)" [e1,e2]) | Right es <- listView e2 = Right $ e1:es listView x = Left x -- | Pragmas. data Pragma = CommentS String -- ^ for single line comments | CommentM (String, String) -- ^ for multiple-line comments. | TokenReg RString Bool Reg -- ^ for tokens | EntryPoints [RCat] | Layout [String] | LayoutStop [String] | LayoutTop | FunDef RFun [String] Exp deriving (Show) -- | User-defined regular expression tokens tokenPragmas :: CFG f -> [(TokenCat,Reg)] tokenPragmas cf = [ (wpThing name, e) | TokenReg name _ e <- cfgPragmas cf ] -- | The names of all user-defined tokens. tokenNames :: CFG f -> [String] tokenNames cf = map fst (tokenPragmas cf) layoutPragmas :: CF -> (Bool,[String],[String]) layoutPragmas cf = let ps = cfgPragmas cf in ( not (null [() | LayoutTop <- ps]), -- if there's layout betw top-level concat [ss | Layout ss <- ps], -- layout-block starting words concat [ss | LayoutStop ss <- ps] -- layout-block ending words ) hasLayout :: CF -> Bool hasLayout cf = case layoutPragmas cf of (t,ws,_) -> t || not (null ws) -- (True,[],_) means: top-level layout only -- | Literal: builtin-token types Char, String, Ident, Integer, Double. type Literal = String type Symbol = String type KeyWord = String ------------------------------------------------------------------------------ -- Identifiers with position information ------------------------------------------------------------------------------ -- | Source positions. data Position = NoPosition | Position { posFile :: FilePath -- ^ Name of the grammar file. , posLine :: Int -- ^ Line in the grammar file. , posColumn :: Int -- ^ Column in the grammar file. } deriving (Show, Eq, Ord) prettyPosition :: Position -> String prettyPosition = \case NoPosition -> "" Position file line col -> List.intercalate ":" [ file, show line, show col, "" ] data WithPosition a = WithPosition { wpPosition :: Position , wpThing :: a } deriving (Show, Functor, Foldable, Traversable) -- | Ignore position in equality and ordering. instance Eq a => Eq (WithPosition a) where (==) = (==) `on` wpThing instance Ord a => Ord (WithPosition a) where compare = compare `on` wpThing noPosition :: a -> WithPosition a noPosition = WithPosition NoPosition -- | A "ranged string" (terminology from Agda code base). type RString = WithPosition String -- | Prefix string with pretty-printed position information. blendInPosition :: RString -> String blendInPosition (WithPosition pos msg) | null p = msg | otherwise = unwords [ p, msg ] where p = prettyPosition pos type RCat = WithPosition Cat valCat :: Rul fun -> Cat valCat = wpThing . valRCat npRule :: Fun -> Cat -> SentForm -> InternalRule -> Rule npRule f c r internal = Rule (noPosition f) (noPosition c) r internal npIdentifier :: String -> Abs.Identifier npIdentifier x = Abs.Identifier ((0, 0), x) -- identifierName :: Identifier -> String -- identifierName (Identifier (_, x)) = x -- identifierPosition :: String -> Identifier -> Position -- identifierPosition file (Identifier ((line, col), _)) = Position file line col ------------------------------------------------------------------------------ -- Categories ------------------------------------------------------------------------------ -- | Categories are the non-terminals of the grammar. data Cat = Cat String -- ^ Ordinary non-terminal. | TokenCat TokenCat -- ^ Token types (like @Ident@, @Integer@, ..., user-defined). | ListCat Cat -- ^ List non-terminals, e.g., @[Ident]@, @[Exp]@, @[Exp1]@. | CoercCat String Integer -- ^ E.g. @Exp1@, @Exp2@. deriving (Eq, Ord) type TokenCat = String type BaseCat = String -- An alias for Cat used in many backends: type NonTerminal = Cat -- | Render category symbols as strings catToStr :: Cat -> String catToStr = \case Cat s -> s TokenCat s -> s ListCat c -> "[" ++ catToStr c ++ "]" CoercCat s i -> s ++ show i instance Show Cat where show = catToStr -- | Reads a string into a category. This should only need to handle -- the case of simple categories (with or without coercion) since list -- categories are parsed in the grammar already. To be on the safe side here, -- we still call the parser function that parses categries. strToCat :: String -> Cat strToCat s = case pCat (tokens s) of Right c -> cat2cat c Left _ -> Cat s -- error $ "Error parsing cat " ++ s ++ " (" ++ e ++ ")" -- Might be one of the "Internal cat" which are not -- really parsable... where cat2cat = \case Abs.IdCat (Abs.Identifier (_pos, x)) | null ds -> if c `elem` specialCatsP then TokenCat c else Cat c | otherwise -> CoercCat c (read ds) where (ds, c) = spanEnd isDigit x Abs.ListCat c -> ListCat (cat2cat c) -- Build-in categories contants catString, catInteger, catDouble, catChar, catIdent :: TokenCat catString = "String" catInteger = "Integer" catDouble = "Double" catChar = "Char" catIdent = "Ident" -- | Token categories corresponding to base types. baseTokenCatNames :: [TokenCat] baseTokenCatNames = [ catChar, catDouble, catInteger, catString ] -- the parser needs these specialCatsP :: [TokenCat] specialCatsP = catIdent : baseTokenCatNames -- | Does the category correspond to a data type? isDataCat :: Cat -> Bool isDataCat c = isDataOrListCat c && not (isList c) isDataOrListCat :: Cat -> Bool isDataOrListCat (CoercCat _ _) = False isDataOrListCat (Cat ('@':_)) = False isDataOrListCat (ListCat c) = isDataOrListCat c isDataOrListCat _ = True -- | Categories C1, C2,... (one digit at the end) are variants of C. This function -- returns true if two category are variants of the same abstract category. -- E.g. -- -- >>> sameCat (Cat "Abc") (CoercCat "Abc" 44) -- True sameCat :: Cat -> Cat -> Bool sameCat (CoercCat c1 _) (CoercCat c2 _) = c1 == c2 sameCat (Cat c1) (CoercCat c2 _) = c1 == c2 sameCat (CoercCat c1 _) (Cat c2) = c1 == c2 sameCat c1 c2 = c1 == c2 -- | Removes precedence information. C1 => C, [C2] => [C] normCat :: Cat -> Cat normCat (ListCat c) = ListCat (normCat c) normCat (CoercCat c _) = Cat c normCat c = c normCatOfList :: Cat -> Cat normCatOfList = normCat . catOfList -- | When given a list Cat, i.e. '[C]', it removes the square -- brackets, and adds the prefix List, i.e. 'ListC'. (for Happy and -- Latex) identCat :: Cat -> String identCat (ListCat c) = "List" ++ identCat c identCat c = catToStr c isList :: Cat -> Bool isList (ListCat _) = True isList _ = False -- | Get the underlying category identifier. baseCat :: Cat -> Either BaseCat TokenCat baseCat = \case ListCat c -> baseCat c CoercCat x _ -> Left x Cat x -> Left x TokenCat x -> Right x isTokenCat :: Cat -> Bool isTokenCat (TokenCat _) = True isTokenCat _ = False maybeTokenCat :: Cat -> Maybe TokenCat maybeTokenCat = \case TokenCat c -> Just c _ -> Nothing -- | Unwraps the list constructor from the category name. -- E.g. @[C1] => C1@. catOfList :: Cat -> Cat catOfList (ListCat c) = c catOfList c = c ------------------------------------------------------------------------------ -- Functions ------------------------------------------------------------------------------ -- | Fun is the function name of a rule. type Fun = String type RFun = RString instance IsString RFun where fromString = noPosition class IsFun a where funName :: a -> String instance IsFun String where funName = id instance IsFun a => IsFun (WithPosition a) where funName = funName . wpThing funNameSatisfies :: IsFun a => (String -> Bool) -> a -> Bool funNameSatisfies f = f . funName -- | Is this function just a coercion? (I.e. the identity) isCoercion :: IsFun a => a -> Bool isCoercion = funNameSatisfies (== "_") -- perhaps this should be changed to "id"? isDefinedRule :: IsFun a => a -> Bool isDefinedRule = funNameSatisfies $ \case (x:_) -> isLower x [] -> error "isDefinedRule: empty function name" -- isDefinedRule :: Fun -> Bool -- isDefinedRule (WithPosition _ (x:_)) = isLower x -- isDefinedRule (WithPosition pos []) = error $ -- unwords [ prettyPosition pos, "isDefinedRule: empty function name" ] isProperLabel :: IsFun a => a -> Bool isProperLabel f = not (isCoercion f || isDefinedRule f) isNilFun, isOneFun, isConsFun, isNilCons,isConcatFun :: IsFun a => a -> Bool isNilCons f = isNilFun f || isOneFun f || isConsFun f || isConcatFun f isNilFun = funNameSatisfies (== "[]") isOneFun = funNameSatisfies (== "(:[])") isConsFun = funNameSatisfies (== "(:)") isConcatFun = funNameSatisfies (== "(++)") ------------------------------------------------------------------------------ -- | The abstract syntax of a grammar. type Data = (Cat, [(String, [Cat])]) -- | @firstEntry@ returns the first of the @entrypoints@, -- or (if none), the first parsable @Cat@egory appearing in the grammar. firstEntry :: CF -> Cat firstEntry cf = List1.head $ allEntryPoints cf -- aggressively ban nonunique names (AR 31/5/2012) -- | Constructors and categories. allNames :: CF -> [RString] allNames cf = [ f | f <- map funRule $ cfgRules cf , not $ isNilCons f , not $ isCoercion f ] ++ allCatsIdNorm cf -- Put the categories after the labels so that the error location -- for a non-unique name is at the label rather than the category. -- | Get all elements with more than one occurrence. filterNonUnique :: (Ord a) => [a] -> [a] filterNonUnique xs = [ x | (x:_:_) <- group $ sort xs ] -- | Extract the comment pragmas. commentPragmas :: [Pragma] -> [Pragma] commentPragmas = filter isComment where isComment (CommentS _) = True isComment (CommentM _) = True isComment _ = False lookupRule :: Eq f => f -> [Rul f] -> Maybe (Cat, SentForm) lookupRule f = lookup f . map unRule where unRule (Rule f' c rhs _internal) = (f', (wpThing c, rhs)) -- | Returns all parseable rules that construct the given Cat. -- Whitespace separators have been removed. rulesForCat :: CF -> Cat -> [Rule] rulesForCat cf cat = [ removeWhiteSpaceSeparators r | r <- cfgRules cf, isParsable r, valCat r == cat] removeWhiteSpaceSeparators :: Rul f -> Rul f removeWhiteSpaceSeparators = mapRhs $ mapMaybe $ either (Just . Left) $ \ sep -> if all isSpace sep then Nothing else Just (Right sep) -- | Modify the 'rhsRule' part of a 'Rule'. mapRhs :: (SentForm -> SentForm) -> Rul f -> Rul f mapRhs f r = r { rhsRule = f $ rhsRule r } -- | Like rulesForCat but for normalized value categories. -- I.e., `rulesForCat (Cat "Exp")` will return rules for category Exp but also -- Exp1, Exp2... in case of coercion rulesForNormalizedCat :: CF -> Cat -> [Rule] rulesForNormalizedCat cf cat = [r | r <- cfgRules cf, isParsable r, normCat (valCat r) == cat] -- | As rulesForCat, but this version doesn't exclude internal rules. rulesForCat' :: CF -> Cat -> [Rule] rulesForCat' cf cat = [r | r <- cfgRules cf, valCat r == cat] -- | Get all categories of a grammar matching the filter. -- (No Cat w/o production returned; no duplicates.) allCats :: (InternalRule -> Bool) -> CFG f -> [Cat] allCats pred = nub . map valCat . filter (pred . internal) . cfgRules -- | Get all categories of a grammar. -- (No Cat w/o production returned; no duplicates.) reallyAllCats :: CFG f -> [Cat] reallyAllCats = allCats $ const True allParserCats :: CFG f -> [Cat] allParserCats = allCats (== Parsable) -- | Gets all normalized identified Categories allCatsIdNorm :: CF -> [RString] allCatsIdNorm = nub . map (fmap (identCat . normCat) . valRCat) . cfgRules -- | Get all normalized Cat allCatsNorm :: CF -> [Cat] allCatsNorm = nub . map (normCat . valCat) . cfgRules -- | Get all normalized Cat allParserCatsNorm :: CFG f -> [Cat] allParserCatsNorm = nub . map normCat . allParserCats -- | Is the category is used on an rhs? -- Includes internal rules. isUsedCat :: CFG f -> Cat -> Bool isUsedCat cf = (`Set.member` cfgUsedCats cf) -- | Group all parsable categories with their rules. -- Deletes whitespace separators, as they will not become part of the parsing rules. ruleGroups :: CF -> [(Cat,[Rule])] ruleGroups cf = [(c, rulesForCat cf c) | c <- allParserCats cf] -- | Group all categories with their rules including internal rules. ruleGroupsInternals :: CF -> [(Cat,[Rule])] ruleGroupsInternals cf = [(c, rulesForCat' cf c) | c <- reallyAllCats cf] -- | Get all literals of a grammar. (e.g. String, Double) literals :: CFG f -> [TokenCat] literals cf = cfgLiterals cf ++ map fst (tokenPragmas cf) -- | Get the keywords of a grammar. reservedWords :: CFG f -> [String] reservedWords = sort . cfgKeywords -- | Canonical, numbered list of symbols and reserved words. (These do -- not end up in the AST.) cfTokens :: CFG f -> [(String,Int)] cfTokens cf = zip (sort (cfgSymbols cf ++ reservedWords cf)) [1..] -- NOTE: some backends (incl. Haskell) assume that this list is sorted. -- | Comments can be defined by the 'comment' pragma comments :: CF -> ([(String,String)],[String]) comments cf = ([p | CommentM p <- xs], [s | CommentS s <- xs]) where xs = commentPragmas (cfgPragmas cf) -- | Number of block comment forms defined in the grammar file. numberOfBlockCommentForms :: CF -> Int numberOfBlockCommentForms = length . fst . comments -- built-in categories (corresponds to lexer) -- | Whether the grammar uses the predefined Ident type. hasIdent :: CFG f -> Bool hasIdent cf = isUsedCat cf $ TokenCat catIdent -- these need new datatypes -- | Categories corresponding to tokens. These end up in the -- AST. (unlike tokens returned by 'cfTokens') specialCats :: CF -> [TokenCat] specialCats cf = (if hasIdent cf then (catIdent:) else id) (map fst (tokenPragmas cf)) -- * abstract syntax trees: data type definitions -- -- The abstract syntax, instantiated by the Data type, is the type signatures -- of all the constructors. -- | Return the abstract syntax of the grammar. -- All categories are normalized, so a rule like: -- EAdd . Exp2 ::= Exp2 "+" Exp3 ; -- Will give the following signature: EAdd : Exp -> Exp -> Exp getAbstractSyntax :: CF -> [Data] getAbstractSyntax cf = [ ( c, nub (constructors c) ) | c <- allCatsNorm cf ] where constructors cat = do rule <- cfgRules cf let f = funRule rule guard $ not (isDefinedRule f) guard $ not (isCoercion f) guard $ normCat (valCat rule) == cat let cs = [normCat c | Left c <- rhsRule rule ] return (wpThing f, cs) -- | All the functions below implement the idea of getting the -- abstract syntax of the grammar with some variation but they seem to do a -- poor job at handling corner cases involving coercions. -- Use 'getAbstractSyntax' instead if possible. cf2data' :: (Cat -> Bool) -> CF -> [Data] cf2data' predicate cf = [(cat, nub (map mkData [r | r <- cfgRules cf, let f = funRule r, not (isDefinedRule f), not (isCoercion f), sameCat cat (valCat r)])) | cat <- nub $ map normCat $ filter predicate $ reallyAllCats cf ] where mkData (Rule f _ its _) = (wpThing f, [normCat c | Left c <- its ]) cf2data :: CF -> [Data] cf2data = cf2data' $ isDataCat . normCat cf2dataLists :: CF -> [Data] cf2dataLists = cf2data' $ isDataOrListCat . normCat specialData :: CF -> [Data] specialData cf = [(TokenCat name, [(name, [TokenCat catString])]) | name <- specialCats cf] -- | Get the type of a rule label. sigLookup :: IsFun a => a -> CF -> Maybe (WithPosition Type) sigLookup f = Map.lookup (funName f) . cfgSignature -- | Checks if the rule is parsable. isParsable :: Rul f -> Bool isParsable = (Parsable ==) . internal -- | Checks if the list has a non-empty rule. hasOneFunc :: [Rule] -> Bool hasOneFunc = any (isOneFun . funRule) -- | Gets the separator for a list. getCons :: [Rule] -> String getCons rs = case find (isConsFun . funRule) rs of Just (Rule _ _ cats _) -> seper cats Nothing -> error $ "getCons: no construction function found in " ++ intercalate ", " (map (show . funRule) rs) where seper [] = [] seper (Right x:_) = x seper (Left _:xs) = seper xs -- | Helper function that gets the list separator by precedence level getSeparatorByPrecedence :: [Rule] -> [(Integer,String)] getSeparatorByPrecedence rules = [ (p, getCons (getRulesFor p)) | p <- precedences ] where precedences = Set.toDescList . Set.fromList $ map precRule rules getRulesFor p = [ r | r <- rules, precRule r == p ] isEmptyListCat :: CF -> Cat -> Bool isEmptyListCat cf c = elem "[]" $ map (wpThing . funRule) $ rulesForCat' cf c isNonterm :: Either Cat String -> Bool isNonterm (Left _) = True isNonterm (Right _) = False -- used in Happy to parse lists of form 'C t [C]' in reverse order -- applies only if the [] rule has no terminals revSepListRule :: Rul f -> Rul f revSepListRule (Rule f c ts internal) = Rule f c (xs : x : sep) internal where (x,sep,xs) = (head ts, init (tail ts), last ts) -- invariant: test in findAllReversibleCats have been performed findAllReversibleCats :: CF -> [Cat] findAllReversibleCats cf = [c | (c,r) <- ruleGroups cf, isRev c r] where isRev c rs = case rs of [r1,r2] | isList c -> if isConsFun (funRule r2) then tryRev r2 r1 else isConsFun (funRule r1) && tryRev r1 r2 _ -> False tryRev (Rule f _ ts@(x:_:_) _) r = isEmptyNilRule r && isConsFun f && isNonterm x && isNonterm (last ts) tryRev _ _ = False isEmptyNilRule :: IsFun a => Rul a -> Bool isEmptyNilRule (Rule f _ ts _) = isNilFun f && null ts -- | Returns the precedence of a category symbol. -- E.g. -- >>> precCat (CoercCat "Abc" 4) -- 4 precCat :: Cat -> Integer precCat (CoercCat _ i) = i precCat (ListCat c) = precCat c precCat _ = 0 precRule :: Rul f -> Integer precRule = precCat . valCat -- | Defines or uses the grammar token types like @Ident@? -- Excludes position tokens. hasIdentLikeTokens :: CFG g -> Bool hasIdentLikeTokens cf = hasIdent cf || or [ not b | TokenReg _ b _ <- cfgPragmas cf ] -- | Is there a @position token@ declaration in the grammar? hasPositionTokens :: CFG g -> Bool hasPositionTokens cf = or [ b | TokenReg _ b _ <- cfgPragmas cf ] -- | Does the category have a position stored in AST? isPositionCat :: CFG f -> TokenCat -> Bool isPositionCat cf cat = or [ b | TokenReg name b _ <- cfgPragmas cf, wpThing name == cat] -- | Categories that are entry points to the parser. -- -- These are either the declared @entrypoints@ (in the original order), -- or, if no @entrypoints@ were declared explicitly, -- all parsable categories (in the order of declaration in the grammar file). allEntryPoints :: CFG f -> List1 Cat allEntryPoints cf = case concat [ cats | EntryPoints cats <- cfgPragmas cf ] of [] -> List1.fromList $ allParserCats cf -- assumed to be non-empty c:cs -> fmap wpThing (c :| cs)