{-# OPTIONS -fglasgow-exts #-} {- BNF Converter: Abstract syntax Copyright (C) 2004 Author: Markus Forberg, Michael Pellauer, Aarne Ranta This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} module CF ( -- Types. CF, Rule, Pragma(..), Exp(..), Literal, Symbol, KeyWord, Cat, Fun, Tree(..), prTree, -- print an abstract syntax tree Data, -- describes the abstract syntax of a grammar cf2data, -- translates a grammar to a Data object. cf2dataLists, -- translates to a Data with List categories included. -- Literal categories, constants, firstCat, -- the first value category in the grammar. firstEntry, -- the first entry or the first value category specialCats, -- ident specialCatsP, -- all literals specialData, -- special data isCoercion, -- wildcards in grammar (avoid syntactic clutter) isDefinedRule, -- defined rules (allows syntactic sugar) isProperLabel, -- not coercion or defined rule allCats, -- all categories of a grammar allCatsIdNorm, allEntryPoints, -- those categories that are entry points to the parser reservedWords, -- get the keywords of a grammar. symbols, -- get all symbols literals, -- get all literals of a grammar. (e.g. String, Double) reversibleCats, -- categories that is left-recursive transformable. findAllReversibleCats, -- find all reversible categories identCat, -- transforms '[C]' to ListC (others, unchanged). valCat, -- The value category of a rule. isParsable, -- Checks if the rule is parsable. rulesOfCF, -- All rules of a grammar. rulesForCat, -- rules for a given category ruleGroups, -- Categories are grouped with their rules. ruleGroupsInternals, --As above, but includes internal cats. funRule, -- The function name of a rule. notUniqueFuns, -- Returns a list of function labels that are not unique. badInheritence, -- Returns a list of all function labels that can cause problems in languages with inheritence. isList, -- Checks if a category is a list category. -- Information functions for list functions. isNilFun, -- empty list function? ([]) isOneFun, -- one element list function? (:[]) isConsFun, -- constructor function? (:) isNilCons, -- either three of above? isEmptyListCat, -- checks if the list permits [] revSepListRule, -- reverse a rule, if it is of form C t [C]. rhsRule, -- The list of Terminals/NonTerminals of a rule. normCat, -- Removes precendence information. C1 => C, [C2] => [C] normCatOfList, -- Removes precendence information and enclosed List. C1 => C, C2 => C catOfList, -- Removes enclosed list: [C1] => C1 comments, -- translates the pragmas into two list containing the s./m. comments tokenPragmas, -- user-defined regular expression tokens tokenNames, -- The names of all user-defined tokens precCat, -- get the precendence level of a Cat C1 => 1, C => 0 precLevels, -- get all precendence levels in the grammar, sorted in increasing order. precRule, -- get the precendence level of the value category of a rule. precCF, -- Check if the CF consists of precendence levels. isUsedCat, internalCat, -- the symbol # isPositionCat, -- category that has a position in AST hasIdent, hasLayout, layoutPragmas, checkRule, CFP, -- CF with profiles RuleP, FunP, Prof, cf2cfpRule, cf2cfp, cfp2cf, trivialProf, rulesOfCFP, funRuleP, ruleGroupsP, allCatsP, allEntryPointsP ) where import Utils (prParenth,(+++)) import List (nub, intersperse, partition, sort,sort,group) import Char import AbsBNF (Reg()) -- A context free grammar consists of a set of rules and some extended -- information (e.g. pragmas, literals, symbols, keywords) type CF = (Exts,[Rule]) -- A rule consists of a function name, a main category and a sequence of -- terminals and non-terminals. -- function_name . Main_Cat ::= sequence type Rule = (Fun, (Cat, [Either Cat String])) -- polymorphic types for common type signatures for CF and CFP type Rul f = (f, (Cat, [Either Cat String])) type CFG f = (Exts,[Rul f]) type Exts = ([Pragma],Info) -- Info is information extracted from the CF, for easy access. -- Literals - Char, String, Ident, Integer, Double -- Strings are quoted strings, and Ident are unquoted. -- Symbols - symbols in the grammar, e.g. ´*´, '->'. -- KeyWord - reserved words, e.g. 'if' 'while' type Info = ([Literal],[Symbol],[KeyWord],[Cat]) -- Expressions for function definitions data Exp = App String [Exp] | LitInt Integer | LitDouble Double | LitChar Char | LitString String instance Show Exp where showsPrec p e = case listView e of Right es -> showString "[" . foldr (.) id (intersperse (showString ", ") $ map shows es) . showString "]" Left (App x []) -> showString x Left (App "(:)" [e1,e2]) -> showParen (p>0) $ showsPrec 1 e1 . showString " : " . shows e2 Left (App x es) -> showParen (p>1) $ foldr (.) id $ intersperse (showString " ") $ showString x : map (showsPrec 2) es Left (LitInt n) -> shows n Left (LitDouble x) -> shows x Left (LitChar c) -> shows c Left (LitString s) -> shows s where listView (App "[]" []) = Right [] listView (App "(:)" [e1,e2]) | Right es <- listView e2 = Right $ e1:es listView e = Left e -- pragmas for single line comments and for multiple-line comments. data Pragma = CommentS String | CommentM (String,String) | TokenReg String Bool Reg | EntryPoints [Cat] | Layout [String] | LayoutStop [String] | LayoutTop | FunDef String [String] Exp -- ... deriving (Show) tokenPragmas :: CFG f -> [(String,Reg)] tokenPragmas cf = [(name,exp) | TokenReg name _ exp <- pragmasOfCF cf] tokenNames :: CF -> [String] tokenNames cf = fst (unzip (tokenPragmas cf)) layoutPragmas :: CF -> (Bool,[String],[String]) layoutPragmas cf = let ps = pragmasOfCF cf in ( not (null [() | LayoutTop <- ps]), -- if there's layout betw top-level concat [ss | Layout ss <- ps], -- layout-block starting words concat [ss | LayoutStop ss <- ps] -- layout-block ending words ) hasLayout :: CF -> Bool hasLayout cf = case layoutPragmas cf of (t,ws,_) -> t || not (null ws) -- (True,[],_) means: top-level layout only -- Literal: Char, String, Ident, Integer, Double type Literal = Cat type Symbol = String type KeyWord = String -- Cat is the Non-terminals of the grammar. type Cat = String -- Fun is the function name of a rule. type Fun = String internalCat :: Cat internalCat = "#" -- Abstract syntax tree. newtype Tree = Tree (Fun,[Tree]) -- The abstract syntax of a grammar. type Data = (Cat, [(Fun,[Cat])]) -- firstCat returns the first Category appearing in the grammar. firstCat :: CF -> Cat firstCat = valCat . head . rulesOfCF firstEntry :: CF -> Cat firstEntry cf = case allEntryPoints cf of (x:_) -> x _ -> firstCat cf rulesOfCF :: CF -> [Rule] rulesOfCF = snd notUniqueFuns :: CF -> [Fun] notUniqueFuns cf = let xss = group $ sort [ f | (f,_) <- rulesOfCF cf, not (isNilCons f || isCoercion f)] in [ head xs | xs <- xss, length xs > 1] badInheritence :: CF -> [Cat] badInheritence cf = concatMap checkGroup (ruleGroups cf) where checkGroup (cat, rs) = if (length rs <= 1) then [] else case lookup cat rs of Nothing -> [] Just x -> [cat] infoOfCF :: CFG f -> Info infoOfCF = snd . fst pragmasOfCF :: CFG f -> [Pragma] pragmasOfCF = fst . fst -- extract the comment pragmas. commentPragmas :: [Pragma] -> [Pragma] commentPragmas = filter isComment where isComment (CommentS _) = True isComment (CommentM _) = True isComment _ = False -- returns all normal rules that constructs the given Cat. rulesForCat :: CF -> Cat -> [Rule] rulesForCat cf cat = [normRuleFun r | r <- rulesOfCF cf, isParsable r, valCat r == cat] --This version doesn't exclude internal rules. rulesForCat' :: CF -> Cat -> [Rule] rulesForCat' cf cat = [normRuleFun r | r <- rulesOfCF cf, valCat r == cat] valCat :: Rul f -> Cat valCat = fst . snd -- Get all categories of a grammar. allCats :: CF -> [Cat] allCats = nub . map valCat . rulesOfCF -- no cats w/o production -- Gets all normalized identified Categories allCatsIdNorm :: CF -> [Cat] allCatsIdNorm = nub . map identCat . map normCat . allCats -- category is used on an rhs isUsedCat :: CF -> Cat -> Bool isUsedCat cf cat = elem cat [c | r <- (rulesOfCF cf), Left c <- rhsRule r] -- entry points to parser ---- allEntryPoints :: CF -> [Cat] allEntryPoints cf = case concat [cats | EntryPoints cats <- pragmasOfCF cf] of [] -> allCats cf cs -> cs -- group all categories with their rules. ruleGroups :: CF -> [(Cat,[Rule])] ruleGroups cf = [(c, rulesForCat cf c) | c <- allCats cf] -- group all categories with their rules including internal rules. ruleGroupsInternals :: CF -> [(Cat,[Rule])] ruleGroupsInternals cf = [(c, rulesForCat' cf c) | c <- allCats cf] literals :: CFG f -> [Cat] literals cf = lits ++ owns where (lits,_,_,_) = infoOfCF cf owns = map fst $ tokenPragmas cf symbols :: CFG f -> [String] symbols cf = syms where (_,syms,_,_) = infoOfCF cf reservedWords :: CFG f -> [String] reservedWords cf = sort keywords where (_,_,keywords,_) = infoOfCF cf reversibleCats :: CFG f -> [Cat] reversibleCats cf = cats where (_,_,_,cats) = infoOfCF cf -- Comments can be defined by the 'comment' pragma comments :: CF -> ([(String,String)],[String]) comments cf = case commentPragmas (pragmasOfCF cf) of xs -> ([p | CommentM p <- xs], [s | CommentS s <- xs]) funRule :: Rule -> Fun funRule = fst rhsRule :: Rul f -> [Either Cat String] rhsRule = snd . snd -- built-in categories (corresponds to lexer) -- if the gramamr uses the predefined Ident type hasIdent :: CF -> Bool hasIdent cf = isUsedCat cf "Ident" -- these need new datatypes specialCats :: CF -> [Cat] specialCats cf = (if hasIdent cf then ("Ident":) else id) (map fst (tokenPragmas cf)) -- the parser needs these specialCatsP :: [Cat] specialCatsP = words "Ident Integer String Char Double" -- to print parse trees prTree :: Tree -> String prTree (Tree (fun,[])) = fun prTree (Tree (fun,trees)) = fun +++ unwords (map pr2 trees) where pr2 t@(Tree (_,ts)) = (if (null ts) then id else prParenth) (prTree t) -- abstract syntax trees: data type definitions cf2data :: CF -> [Data] cf2data cf = [(cat, nub (map mkData [r | r@(f,_) <- rulesOfCF cf, not (isDefinedRule f), not (isCoercion f), eqCat cat (valCat r)])) | cat <- allNormalCats cf] where mkData (f,(_,its)) = (normFun f,[normCat c | Left c <- its, c /= internalCat]) --This version includes lists in the returned data. --Michael 4/03 cf2dataLists :: CF -> [Data] cf2dataLists cf = [(cat, nub (map mkData [r | r@(f,_) <- rulesOfCF cf, not (isDefinedRule f), not (isCoercion f), eqCat cat (valCat r)])) | cat <- (filter (\x -> not $ isDigit $ last x) (allCats cf))] where mkData (f,(_,its)) = (normFun f,[normCat c | Left c <- its, c /= internalCat]) specialData :: CF -> [Data] specialData cf = [(c,[(c,[arg c])]) | c <- specialCats cf] where arg c = case c of _ -> "String" allNormalCats :: CF -> [Cat] allNormalCats = filter isNormal . allCats -- to deal with coercions -- the Haskell convention: the wildcard _ is not a constructor isCoercion :: Fun -> Bool isCoercion = (== "_") isDefinedRule :: Fun -> Bool isDefinedRule (x:_) = isLower x isProperLabel :: Fun -> Bool isProperLabel f = not (isCoercion f || isDefinedRule f) -- categories C1, C2,... (one digit in end) are variants of C eqCat :: Cat -> Cat -> Bool eqCat c c1 = catCat c == catCat c1 normCat :: Cat -> Cat normCat c = case c of '[':cs -> "[" ++ norm (init cs) ++ "]" _ -> unList $ norm c -- to be deprecated where norm = reverse . dropWhile isDigit . reverse normCatOfList :: Cat -> Cat normCatOfList = normCat . catOfList -- for Happy and Latex -- When given a list Cat, i.e. '[C]', it removes the square brackets, -- and adds the prefix List, i.e. 'ListC'. identCat :: Cat -> Cat identCat c = case c of '[':cs -> "List" ++ identCat (init cs) _ -> c normFun :: Fun -> Fun normFun = id -- takeWhile (not . isDigit) normRuleFun :: Rule -> Rule normRuleFun (f,p) = (normFun f, p) isNormal :: Cat -> Bool isNormal c = not (isList c || isDigit (last c)) isParsable :: Rul f -> Bool isParsable (_,(_, Left "#":_)) = False isParsable _ = True isList :: Cat -> Bool isList c = head c == '[' unList :: Cat -> Cat unList c = c catOfList :: Cat -> Cat catOfList c = case c of '[':_:_ -> init (tail c) _ -> c isNilFun, isOneFun, isConsFun, isNilCons :: Fun -> Bool isNilCons f = isNilFun f || isOneFun f || isConsFun f isNilFun f = f == "[]" isOneFun f = f == "(:[])" isConsFun f = f == "(:)" isEmptyListCat :: CF -> Cat -> Bool isEmptyListCat cf c = elem "[]" $ map fst $ rulesForCat' cf c isNonterm = either (const True) (const False) -- used in Happy to parse lists of form 'C t [C]' in reverse order -- applies only if the [] rule has no terminals revSepListRule :: Rul f -> Rul f revSepListRule r@(f,(c, ts)) = (f, (c, xs : x : sep)) where (x,sep,xs) = (head ts, init (tail ts), last ts) -- invariant: test in findAllReversibleCats have been performed findAllReversibleCats :: CF -> [Cat] findAllReversibleCats cf = [c | (c,r) <- ruleGroups cf, isRev c r] where isRev c rs = case rs of [r1,r2] | isList c -> if isConsFun (funRule r2) then tryRev r2 r1 else if isConsFun (funRule r1) then tryRev r1 r2 else False _ -> False tryRev (f,(_,ts@(x:_:xs))) r = isEmptyNilRule r && isConsFun f && isNonterm x && isNonterm (last ts) tryRev _ _ = False isEmptyNilRule (f,(_,ts)) = isNilFun f && null ts precCat :: Cat -> Int precCat = snd . analyseCat precRule :: Rule -> Int precRule = precCat . valCat precLevels :: CF -> [Int] precLevels cf = sort $ nub $ [ precCat c | c <- allCats cf] precCF :: CF -> Bool precCF cf = length (precLevels cf) > 1 catCat :: Cat -> Cat catCat = fst . analyseCat analyseCat :: Cat -> (Cat,Int) analyseCat c = if (isList c) then list c else noList c where list cat = let (rc,n) = noList (init (tail cat)) in ("[" ++ rc ++ "]",n) noList cat = case span isDigit (reverse cat) of ([],c') -> (reverse c', 0) (d,c') -> (reverse c', read (reverse d)) -- we should actually check that -- (1) coercions are always between variants -- (2) no other digits are used checkRule :: CF -> RuleP -> Either RuleP String checkRule cf r@((f,_),(cat,rhs)) | badCoercion = Right $ "Bad coercion in rule" +++ s | badNil = Right $ "Bad empty list rule" +++ s | badOne = Right $ "Bad one-element list rule" +++ s | badCons = Right $ "Bad list construction rule" +++ s | badList = Right $ "Bad list formation rule" +++ s | badSpecial = Right $ "Bad special category rule" +++ s | badTypeName = Right $ "Bad type name" +++ unwords badtypes +++ "in" +++ s | badFunName = Right $ "Bad constructor name" +++ f +++ "in" +++ s | badMissing = Right $ "No production for" +++ unwords missing ++ ", appearing in rule" +++ s | otherwise = Left r where s = f ++ "." +++ cat +++ "::=" +++ unwords (map (either id show) rhs) --- c = normCat cat cs = [normCat c | Left c <- rhs] badCoercion = isCoercion f && not ([c] == cs) badNil = isNilFun f && not (isList c && null cs) badOne = isOneFun f && not (isList c && cs == [catOfList c]) badCons = isConsFun f && not (isList c && cs == [catOfList c, c]) badList = isList c && not (isCoercion f || isNilFun f || isOneFun f || isConsFun f) badSpecial = elem c specialCatsP && not (isCoercion f) badMissing = not (null missing) missing = filter nodef [c | Left c <- rhs] nodef t = notElem t defineds defineds = "#" : map fst (tokenPragmas cf) ++ specialCatsP ++ map valCat (rulesOfCF cf) badTypeName = not (null badtypes) badtypes = filter isBadType $ cat : [c | Left c <- rhs] isBadType c = not (isUpper (head c) || isList c || c == "#") badFunName = not (all (\c -> isAlphaNum c || c == '_') f {-isUpper (head f)-} || isCoercion f || isNilFun f || isOneFun f || isConsFun f) isPositionCat :: CFG f -> Cat -> Bool isPositionCat cf cat = or [b | TokenReg name b _ <- pragmasOfCF cf, name == cat] -- grammar with permutation profile à la GF. AR 22/9/2004 type CFP = (Exts,[RuleP]) type FunP = (Fun,Prof) type RuleP = (FunP, (Cat, [Either Cat String])) type Prof = (Fun, [([[Int]],[Int])]) -- the original function name, profile cf2cfp :: CF -> CFP cf2cfp (es,rs) = (es, map cf2cfpRule rs) cf2cfpRule :: Rule -> RuleP cf2cfpRule (f,(c,its)) = ((f, (f, trivialProf its)),(c,its)) cfp2cf :: CFP -> CF cfp2cf (es,rs) = (es,[(f,(c,its)) | ((f,_),(c,its)) <- rs]) trivialProf :: [Either Cat String] -> [([[Int]],[Int])] trivialProf its = [([],[i]) | (i,_) <- zip [0..] [c | Left c <- its]] rulesOfCFP :: CFP -> [RuleP] rulesOfCFP = snd funRuleP :: RuleP -> Fun funRuleP = fst . snd . fst ruleGroupsP :: CFP -> [(Cat,[RuleP])] ruleGroupsP cf = [(c, rulesForCatP cf c) | c <- allCatsP cf] rulesForCatP :: CFP -> Cat -> [RuleP] rulesForCatP cf cat = [r | r <- rulesOfCFP cf, isParsable r, valCat r == cat] allCatsP :: CFP -> [Cat] allCatsP = nub . map valCat . rulesOfCFP -- no cats w/o production allEntryPointsP :: CFP -> [Cat] allEntryPointsP cf = case concat [cats | EntryPoints cats <- pragmasOfCF cf] of [] -> allCatsP cf cs -> cs