{-#Language PatternGuards#-} {- BNF Converter: Abstract syntax Copyright (C) 2004 Author: Markus Forberg, Michael Pellauer, Aarne Ranta This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} module Language.LBNF.CF ( -- Types. CF, RHS, Rule, funRule, isTokenRule, Pragma(..), Reg(..), Exp(..), Literal, Symbol, KeyWord, Cat, Fun, Tree(..), prTree, -- print an abstract syntax tree Data, -- describes the abstract syntax of a grammar cf2data, -- translates a grammar to a Data object. -- cf2dataLists, -- translates to a Data with List categories included. -- Literal categories, constants, firstCat, -- the first value category in the grammar. firstEntry, -- the first entry or the first value category specialCats, -- ident specialCatsP, -- all literals specialData, -- special data isCoercion, -- wildcards in grammar (avoid syntactic clutter) isDefinedRule, -- defined rules (allows syntactic sugar) isProperLabel, -- not coercion or defined rule allCats, -- all categories of a grammar allCatsIdNorm, allEntryPoints, -- those categories that are entry points to the parser reservedWords, -- get the keywords of a grammar. symbols, -- get all symbols literals, -- get all literals of a grammar. (e.g. String, Double) typed_literals, reversibleCats, -- categories that is left-recursive transformable. findAllReversibleCats, -- find all reversible categories identCat, -- transforms '[C]' to ListC (others, unchanged). valCat, -- The value category of a rule. isParsable, -- Checks if the rule is parsable. rulesOfCF, -- All rules of a grammar. rulesForCat, -- rules for a given category ruleGroups, -- Categories are grouped with their rules. ruleGroupsInternals, --As above, but includes internal cats. notUniqueFuns, -- Returns a list of function labels that are not unique. badInheritence, -- Returns a list of all function labels that can cause problems in languages with inheritence. isList, -- Checks if a category is a list category. -- Information functions for list functions. isNilFun, -- empty list function? ([]) isOneFun, -- one element list function? (:[]) isConsFun, -- constructor function? (:) isNilCons, -- either three of above? isEmptyListCat, -- checks if the list permits [] revSepListRule, -- reverse a rule, if it is of form C t [C]. rhsRule, -- The list of Terminals/NonTerminals of a rule. normCat, -- Removes precendence information. C1 => C, [C2] => [C] normCatOfList, -- Removes precendence information and enclosed List. C1 => C, C2 => C catOfList, -- Removes enclosed list: [C1] => C1 comments, -- translates the pragmas into two list containing the s./m. comments ruleTokens, tokenPragmas, -- user-defined regular expression tokens tokenNames, -- The names of all user-defined tokens precCat, -- get the precendence level of a Cat C1 => 1, C => 0 precLevels, -- get all precendence levels in the grammar, sorted in increasing order. precRule, -- get the precendence level of the value category of a rule. precCF, -- Check if the CF consists of precendence levels. isUsedCat, internalCat, -- the symbol # isPositionCat, -- category that has a position in AST isNormal, isAqFun, hasIdent, hasLayout, hasAq, rename, renameAq, renameAqt, unAq, unAqs, aqSyntax, -- resolveAq, layoutPragmas, derivations, checkRule, visibleNames, quoterName, quoters, {- CFP, -- CF with profiles RuleP, FunP, Prof, cf2cfpRule, cf2cfp, cfp2cf, trivialProf, rulesOfCFP, funRuleP, ruleGroupsP, allCatsP, allEntryPointsP -} ) where import Language.LBNF.Utils (prParenth,(+++)) import Data.List (nub, intersperse, partition, sort,sort,group) import Data.Char import Language.LBNF.Grammar (Reg()) -- A context free grammar consists of a set of rules and some extended -- information (e.g. pragmas, literals, symbols, keywords) -- data CF = MkCF { -- rulesOfCF :: CF -> [Rule] -- , infoOfCF :: CFG f -> Info -- , pragmasOfCF :: CFG f -> [Pragma] -- } type CF = (Exts,[Rule]) rulesOfCF :: CF -> [Rule] rulesOfCF = snd infoOfCF :: CFG f -> Info infoOfCF = snd . fst pragmasOfCF :: CFG f -> [Pragma] pragmasOfCF = fst . fst -- A rule consists of a function name, a main category and a sequence of -- terminals and non-terminals. -- function_name . Main_Cat ::= sequence isTokenRule :: Rule -> Bool isTokenRule = either (const False) (const True) . rhsRule funRule :: Rule -> Fun funRule = fst oldRHS = either id (const []) rhsRule :: Rule -> RHS rhsRule = snd . snd type RHS = Either [Either Cat String] (Reg,String) type Rule = (Fun, (Cat, RHS)) -- polymorphic types for common type signatures for CF and CFP type Rul f = Rule -- (f, (Cat, [Either Cat String])) type CFG f = CF -- (Exts,[Rul f]) type Exts = ([Pragma],Info) -- Info is information extracted from the CF, for easy access. -- Literals - Char, String, Ident, Integer, Double -- Strings are quoted strings, and Ident are unquoted. -- Symbols - symbols in the grammar, e.g. '*', '->'. -- KeyWord - reserved words, e.g. 'if' 'while' type Info = ([Literal],[Symbol],[KeyWord],[Cat]) -- Expressions for function definitions data Exp = App String [Exp] | LitInt Integer | LitDouble Double | LitChar Char | LitString String deriving (Eq) instance Show Exp where showsPrec p e = case listView e of Right es -> showString "[" . foldr (.) id (intersperse (showString ", ") $ map shows es) . showString "]" Left (App x []) -> showString x Left (App "(:)" [e1,e2]) -> showParen (p>0) $ showsPrec 1 e1 . showString " : " . shows e2 Left (App x es) -> showParen (p>1) $ foldr (.) id $ intersperse (showString " ") $ showString x : map (showsPrec 2) es Left (LitInt n) -> shows n Left (LitDouble x) -> shows x Left (LitChar c) -> shows c Left (LitString s) -> shows s where listView (App "[]" []) = Right [] listView (App "(:)" [e1,e2]) | Right es <- listView e2 = Right $ e1:es listView e = Left e -- pragmas for single line comments and for multiple-line comments. data Pragma = CommentS String | CommentM (String,String) | TokenReg String Bool Reg | EntryPoints [Cat] | Layout [String] | LayoutStop [String] | LayoutTop | Derive [String] | FunDef String [String] Exp | AntiQuote String String String -- ... deriving (Show, Eq) ruleTokens :: CF -> [(String,Reg)] ruleTokens cf = [(token,reg) | (fun,(c,Right (reg,token))) <- rulesOfCF cf] tokenPragmas :: CF -> [(String,Reg)] tokenPragmas cf = [(name,exp) | TokenReg name _ exp <- pragmasOfCF cf] tokenNames :: CF -> [String] tokenNames cf = fst (unzip (tokenPragmas cf)) layoutPragmas :: CF -> (Bool,[String],[String]) layoutPragmas cf = let ps = pragmasOfCF cf in ( not (null [() | LayoutTop <- ps]), -- if there's layout betw top-level concat [ss | Layout ss <- ps], -- layout-block starting words concat [ss | LayoutStop ss <- ps] -- layout-block ending words ) derivations :: CF -> [String] derivations cf = case concat [ss|Derive ss <- pragmasOfCF cf] of [] -> ["Show", "Eq", "Ord"] x -> x hasLayout :: CF -> Bool hasLayout cf = case layoutPragmas cf of (t,ws,_) -> t || not (null ws) -- (True,[],_) means: top-level layout only hasAq :: CF -> Bool hasAq cf = case [(b,a) | AntiQuote b i a <- pragmasOfCF cf] of [] -> False _ -> True aqSyntax :: CF -> Maybe (String,String,String) aqSyntax cf = case [(b,i,a) | AntiQuote b i a <- pragmasOfCF cf] of [] -> Nothing [t] -> Just t many -> error "aqSyntax: Multiple antiquote pragmas" {- resolveAq cf@((ps,(i,t,y,z)),rs0) = maybe cf addAqRules $ aqSyntax cf where addAqRules (b,a) = ((map renamePragma ps,(newi,nub $ "[":"]":t,y,(map rename z))),rs) where rs = map renameRule (rulesOfCF cf) ++ newRules ++ concat (map newType newi = nub $ "String":i newRules = map mkNewRule $ filter isNormal $ allCats cf mkNewRule s = (renameAq s,(rename s,map Right b ++ [Left $ "String"] ++ map Right a)) renameRule (fun,(cat,itms)) = (rename fun,(rename cat, map renameItem itms)) renameItem = either (Left . rename) (Right . id) renamePragma p = case p of EntryPoints cs -> EntryPoints $ map rename cs _ -> p newType s = [ (rename s,(rename s,[Left $ s])), (s++"__AQ",(rename s,map Right b ++ [Left $ "String"] ++ map Right a)) ] -} rename s = case s of "_" -> s "$" -> s "#" -> s "(:)" -> s "(:[])" -> s "[]" -> s ('$':s) -> "AQ___" ++ s ('[':l) -> '[' : rename (init l) ++ "]" _ -> "AQ_" ++ normCat s ++ number s renameAqt s = case s of ('[':l) -> '[' : renameAqt (init l) ++ "]" _ -> "AQ___" ++ normCat s ++ number s renameAq s = case s of ('[':l) -> '[' : renameAq (init l) ++ "]" _ -> "AQ__" ++ normCat s ++ number s number = reverse . takeWhile isDigit . reverse unAq s = case s of 'A':'Q':'_':r -> Just r _ -> Nothing unAqs s = case s of 'A':'Q':'_':'_':'_':r -> Just r 'A':'Q':'_':'_':r -> Just r _ -> Nothing -- Literal: Char, String, Ident, Integer, Double type Literal = Cat type Symbol = String type KeyWord = String -- Cat is the Non-terminals of the grammar. type Cat = String -- Fun is the function name of a rule. type Fun = String internalCat :: Cat internalCat = "#" -- Abstract syntax tree. newtype Tree = Tree (Fun,[Tree]) -- The abstract syntax of a grammar. type Data = (Cat, [(Fun,Either [Cat] String)]) -- firstCat returns the first Category appearing in the grammar. firstCat :: CF -> Cat firstCat = valCat . head . rulesOfCF firstEntry :: CF -> Cat firstEntry cf = case allEntryPoints cf of (x:_) -> x _ -> firstCat cf notUniqueFuns :: CF -> [Fun] notUniqueFuns cf = let xss = group $ sort [ f | (f,_) <- rulesOfCF cf, not (isNilCons f || isCoercion f)] in [ head xs | xs <- xss, length xs > 1] badInheritence :: CF -> [Cat] badInheritence cf = concatMap checkGroup (ruleGroups cf) where checkGroup (cat, rs) = if (length rs <= 1) then [] else case lookup cat rs of Nothing -> [] Just x -> [cat] -- extract the comment pragmas. commentPragmas :: [Pragma] -> [Pragma] commentPragmas = filter isComment where isComment (CommentS _) = True isComment (CommentM _) = True isComment _ = False -- returns all normal rules that constructs the given Cat. rulesForCat :: CF -> Cat -> [Rule] rulesForCat cf cat = [normRuleFun r | r <- rulesOfCF cf, isParsable r, valCat r == cat] --This version doesn't exclude internal rules. rulesForCat' :: CF -> Cat -> [Rule] rulesForCat' cf cat = [normRuleFun r | r <- rulesOfCF cf, valCat r == cat] valCat :: Rul f -> Cat valCat = fst . snd -- Get all categories of a grammar. allCats :: CF -> [Cat] allCats = nub . map valCat . rulesOfCF -- no cats w/o production -- Gets all normalized identified Categories allCatsIdNorm :: CF -> [Cat] allCatsIdNorm = nub . map identCat . map normCat . allCats -- category is used on an rhs isUsedCat :: CF -> Cat -> Bool isUsedCat cf cat = elem cat [c | r <- (rulesOfCF cf), Left c <- oldRHS (rhsRule r)] -- entry points to parser ---- allEntryPoints :: CF -> [Cat] allEntryPoints cf = case concat [cats | EntryPoints cats <- pragmasOfCF cf] of [] -> allCats cf cs -> cs -- group all categories with their rules. ruleGroups :: CF -> [(Cat,[Rule])] ruleGroups cf = [(c, rulesForCat cf c) | c <- allCats cf] -- group all categories with their rules including internal rules. ruleGroupsInternals :: CF -> [(Cat,[Rule])] ruleGroupsInternals cf = [(c, rulesForCat' cf c) | c <- allCats cf] typed_literals :: CF -> [(Fun,Cat)] typed_literals cf = map (\x -> (x,x)) lits ++ owns where (lits,_,_,_) = infoOfCF cf owns = map (\(x,_) -> (x,x)) (tokenPragmas cf) -- ++ rulets rulets = [(fun,c) | (fun,(c,Right reg)) <- rulesOfCF cf] literals :: CF -> [Cat] literals cf = lits ++ owns where (lits,_,_,_) = infoOfCF cf owns = map fst $ tokenPragmas cf ++ ruleTokens cf symbols :: CFG f -> [String] symbols cf = syms where (_,syms,_,_) = infoOfCF cf reservedWords :: CFG f -> [String] reservedWords cf = sort keywords where (_,_,keywords,_) = infoOfCF cf reversibleCats :: CFG f -> [Cat] reversibleCats cf = cats where (_,_,_,cats) = infoOfCF cf -- Comments can be defined by the 'comment' pragma comments :: CF -> ([(String,String)],[String]) comments cf = case commentPragmas (pragmasOfCF cf) of xs -> ([p | CommentM p <- xs], [s | CommentS s <- xs]) -- built-in categories (corresponds to lexer) -- if the gramamr uses the predefined Ident type hasIdent :: CF -> Bool hasIdent cf = isUsedCat cf "Ident" -- these need new datatypes specialCats :: CF -> [Cat] specialCats cf = (if hasIdent cf then ("Ident":) else id) (map fst (tokenPragmas cf)) -- the parser needs these specialCatsP :: [Cat] specialCatsP = words "Ident Integer String Char Double" -- to print parse trees prTree :: Tree -> String prTree (Tree (fun,[])) = fun prTree (Tree (fun,trees)) = fun +++ unwords (map pr2 trees) where pr2 t@(Tree (_,ts)) = (if (null ts) then id else prParenth) (prTree t) -- abstract syntax trees: data type definitions cf2data :: CF -> [Data] cf2data cf = [(cat, nub (map mkData [r | r@(f,_) <- rulesOfCF cf, not (isDefinedRule f), not (isCoercion f), eqCat cat (valCat r), not (isAqFun f)])) | cat <- allNormalCats cf] where mkData :: Rule -> (Fun,Either [Cat] (String)) mkData (f,(_,Left its)) = (normFun f,Left [normCat c | Left c <- its, c /= internalCat]) mkData (f,(_,Right (r,tok))) = (normFun f,Right tok) {- --This version includes lists in the returned data. --Michael 4/03 cf2dataLists :: CF -> [Data] cf2dataLists cf = [(cat, nub (map mkData [r | r@(f,_) <- rulesOfCF cf, not (isDefinedRule f), not (isCoercion f), eqCat cat (valCat r)])) | cat <- (filter (\x -> not $ isDigit $ last x) (allCats cf))] where mkData (f,(_,its)) = (normFun f,[normCat c | Left c <- its, c /= internalCat]) -} specialData :: CF -> [Data] specialData cf = [(c,[(c,Left [arg c])]) | c <- specialCats cf] where arg c = case c of _ -> "String" allNormalCats :: CF -> [Cat] allNormalCats = filter isNormal . allCats -- to deal with coercions -- the Haskell convention: the wildcard _ is not a constructor isCoercion :: Fun -> Bool isCoercion = (== "_") isDefinedRule :: Fun -> Bool isDefinedRule (x:_) = isLower x isProperLabel :: Fun -> Bool isProperLabel f = not (isCoercion f || isDefinedRule f) -- categories C1, C2,... (one digit in end) are variants of C eqCat :: Cat -> Cat -> Bool eqCat c c1 = catCat c == catCat c1 normCat :: Cat -> Cat normCat c = case c of '[':cs -> "[" ++ norm (init cs) ++ "]" _ -> unList $ norm c -- to be deprecated where norm = reverse . dropWhile isDigit . reverse normCatOfList :: Cat -> Cat normCatOfList = normCat . catOfList -- for Happy and Latex -- When given a list Cat, i.e. '[C]', it removes the square brackets, -- and adds the prefix List, i.e. 'ListC'. identCat :: Cat -> Cat identCat c = case c of '[':cs -> "List" ++ identCat (init cs) _ -> c normFun :: Fun -> Fun normFun = id -- takeWhile (not . isDigit) normRuleFun :: Rule -> Rule normRuleFun (f,p) = (normFun f, p) isNormal :: Cat -> Bool isNormal c = not (isList c || isDigit (last c) || isAqFun c) isParsable :: Rul f -> Bool isParsable (_,(_, Left (Left "#":_))) = False isParsable (_,(_, Left (Left "$":_))) = False isParsable _ = True isList :: Cat -> Bool isList c = head c == '[' unList :: Cat -> Cat unList c = c catOfList :: Cat -> Cat catOfList c = case c of '[':_:_ -> init (tail c) _ -> c isNilFun, isOneFun, isConsFun, isNilCons, isAqFun :: Fun -> Bool isNilCons f = isNilFun f || isOneFun f || isConsFun f isNilFun f = f == "[]" isOneFun f = f == "(:[])" isConsFun f = f == "(:)" isEmptyListCat :: CF -> Cat -> Bool isEmptyListCat cf c = elem "[]" $ map fst $ rulesForCat' cf c isNonterm = either (const True) (const False) isAqFun ('$':_) = True isAqFun _ = False -- used in Happy to parse lists of form 'C t [C]' in reverse order -- applies only if the [] rule has no terminals revSepListRule :: Rul f -> Rul f revSepListRule r@(f,(c, Left ts)) = (f, (c, Left $ xs : x : sep)) where (x,sep,xs) = (head ts, init (tail ts), last ts) revSepListRule x = x -- invariant: test in findAllReversibleCats have been performed findAllReversibleCats :: CF -> [Cat] findAllReversibleCats cf = [c | (c,r) <- ruleGroups cf, isRev c r] where isRev c rs = case rs of [r1,r2] | isList c -> if isConsFun (funRule r2) then tryRev r2 r1 else if isConsFun (funRule r1) then tryRev r1 r2 else False _ -> False tryRev :: Rule -> Rule -> Bool tryRev (f,(_,Left (ts@(x:_:xs)))) r = isEmptyNilRule r && isConsFun f && isNonterm x && isNonterm (last ts) tryRev _ _ = False isEmptyNilRule (f,(_,Left ts)) = isNilFun f && null ts isEmptyNilRule _ = False precCat :: Cat -> Int precCat = snd . analyseCat precRule :: Rule -> Int precRule = precCat . valCat precLevels :: CF -> [Int] precLevels cf = sort $ nub $ [ precCat c | c <- allCats cf] precCF :: CF -> Bool precCF cf = length (precLevels cf) > 1 catCat :: Cat -> Cat catCat = fst . analyseCat analyseCat :: Cat -> (Cat,Int) analyseCat c = if (isList c) then list c else noList c where list cat = let (rc,n) = noList (init (tail cat)) in ("[" ++ rc ++ "]",n) noList cat = case span isDigit (reverse cat) of ([],c') -> (reverse c', 0) (d,c') -> (reverse c', read (reverse d)) -- we should actually check that -- (1) coercions are always between variants -- (2) no other digits are used checkRule :: CF -> Rule -> Either Rule String checkRule cf r@(f,(cat,rhs)) | badCoercion = Right $ "Bad coercion in rule" +++ s | badNil = Right $ "Bad empty list rule" +++ s | badOne = Right $ "Bad one-element list rule" +++ s | badCons = Right $ "Bad list construction rule" +++ s | badList = Right $ "Bad list formation rule" +++ s | badSpecial = Right $ "Bad special category rule" +++ s | badTypeName = Right $ "Bad type name" +++ unwords badtypes +++ "in" +++ s | badFunName = Right $ "Bad constructor name" +++ f +++ "in" +++ s | badMissing = Right $ "No production for" +++ unwords missing ++ ", appearing in rule" +++ s | otherwise = Left r where s = f ++ "." +++ cat +++ "::=" +++ unwords (map (either id show) $ transRHS rhs) --- c = normCat cat cs = [normCat c | Left c <- transRHS rhs] badCoercion = isCoercion f && not ([c] == cs) badNil = isNilFun f && not (isList c && null cs) badOne = isOneFun f && not (isList c && cs == [catOfList c]) badCons = isConsFun f && not (isList c && cs == [catOfList c, c]) badList = isList c && not (isCoercion f || isNilFun f || isOneFun f || isConsFun f || isAqFun f) badSpecial = elem c specialCatsP && not (isCoercion f) badMissing = not (null missing) missing = filter nodef [c | Left c <- transRHS rhs] nodef t = notElem t defineds defineds = "#" : map fst (tokenPragmas cf) ++ specialCatsP ++ map valCat (rulesOfCF cf) badTypeName = not (null badtypes) badtypes = filter isBadType $ cat : [c | Left c <- transRHS rhs] isBadType c = not (isUpper (head c) || isList c || c == "#") badFunName = not (all (\c -> isAlphaNum c || c == '_') f {-isUpper (head f)-} || isCoercion f || isNilFun f || isOneFun f || isConsFun f || isAqFun f) transRHS :: RHS -> [Either Cat String] transRHS = either id (const []) isPositionCat :: CFG f -> Cat -> Bool isPositionCat cf cat = or [b | TokenReg name b _ <- pragmasOfCF cf, name == cat] visibleNames :: CF -> [String] visibleNames cf = "myLexer":"tokens":map ('p':) eps ++ map ('q':) eps ++ map initLower eps where eps = quoters cf quoterName :: String -> String quoterName = initLower -- FIXME: List cats quoters :: CF -> [String] quoters = map identCat . allEntryPoints -- FIXME: List cats initLower :: String -> String initLower [] = error "initLower : Empty list" initLower (c:cs) = toLower c : cs