{-| Module : PP.Rule Description : Canonical rule representation Copyright : (c) 2017 Patrick Champion License : see LICENSE file Maintainer : chlablak@gmail.com Stability : provisional Portability : portable -} module PP.Rule ( -- * Canonical low-level rule Rule(..) , uniformize , extend , separate , regexfy -- * Canonical rules as Map , RuleSet , ruleSet , rule , check -- * Rules first set (Map) , FirstSet , firstSet , first ) where import Control.Monad import Data.Binary import Data.Either import Data.List import qualified Data.Map.Strict as Map import Data.Maybe import PP.Lexer (IToken) -- |Canonical rule type data Rule -- |A rule is defined by a non terminal and a list of Term and NonTerm -- The list should end with Empty = Rule String [Rule] -- |Non terminal string | NonTerm String -- |Terminal character | Term IToken -- |Terminal token | TermToken String -- |Empty | Empty -- |Concatenated rules, useful for PP.InputGrammar.rules | Concat [Rule] -- |Regular expression, useful for lexical rules | RegEx String | RegExString String -- ^No parse-string deriving (Eq, Ord) instance Show Rule where show (Rule a xs) = a ++ " -> " ++ right xs where right [] = "" right [x] = show x right (x:xs) = show x ++ "," ++ right xs show (NonTerm a) = a show (Term c) = show c show (TermToken t) = '%' : t show Empty = "$" show (Concat xs) = "Concat " ++ show xs show (RegEx re) = '%' : show re show (RegExString s) = show s instance Binary Rule where put (Rule a xs) = putWord8 0 >> put a >> put xs put (NonTerm a) = putWord8 1 >> put a put (Term c) = putWord8 2 >> put c put (TermToken t) = putWord8 3 >> put t put Empty = putWord8 4 put (Concat xs) = putWord8 5 >> put xs put (RegEx re) = putWord8 6 >> put re put (RegExString s) = putWord8 7 >> put s get = do tag <- getWord8 case tag of 0 -> liftM2 Rule get get 1 -> fmap NonTerm get 2 -> fmap Term get 3 -> fmap TermToken get 4 -> return Empty 5 -> fmap Concat get 6 -> fmap RegEx get 7 -> fmap RegExString get -- |Uniformize a list of rules -- `uniformize = sort . nub . concatMap (flatten . clean)` uniformize :: [Rule] -> [Rule] uniformize = sort . nub . concatMap (flatten . clean) -- |Clean a rule (remove Concat and useless Empty) clean :: Rule -> Rule clean (Rule s xs) = Rule s (cleaning xs) where cleaning [] = [] cleaning a@[Empty] = a cleaning (Empty : xs) = cleaning xs -- useless Empty cleaning (Concat [] : xs) = cleaning xs cleaning (Concat xs : ys) = cleaning (xs ++ ys) -- remove Concat cleaning (Rule s xs : ys) = Rule s (cleaning xs) : cleaning ys -- inner Rule cleaning (x : xs) = x : cleaning xs -- |Replace and extract inner rules flatten :: Rule -> [Rule] flatten (Rule s xs) = Rule s (replace xs) : extract xs where replace [] = [] replace (Rule s _ : xs) = NonTerm s : replace xs -- replacement replace (x : xs) = x : replace xs extract [] = [] extract (r@(Rule _ _) : xs) = flatten r ++ extract xs -- extract inner Rule extract (x : xs) = extract xs -- |Generate an augmented grammar extend :: [Rule] -> Either String [Rule] extend xs = case start xs of Left s -> Left $ "cannot extend, " ++ s Right s -> Right $ Rule "__start" [NonTerm s, Empty] : xs -- |Find start rule start :: [Rule] -> Either String String start xs = let c = candidates xs in case length c of 1 -> Right $ head c _ -> Left $ "no start rule found (candidates: " ++ show c ++ ")" -- |Find start rule candidates candidates :: [Rule] -> [String] candidates = map (fst . head) . filter (all snd) . grp . sortOn fst . evaluate where grp = groupBy (\(a, _) (b, _) -> a == b) evaluate [] = [] evaluate (Rule s xs : ys) = (s, True) : evaluate xs ++ evaluate ys evaluate (NonTerm s : xs) = (s, False) : evaluate xs evaluate (_ : xs) = evaluate xs -- |Rules as a map type RuleSet = Map.Map String [[Rule]] -- |Compute the rule set ruleSet :: [Rule] -> RuleSet ruleSet xs = Map.fromList [(n, collect n xs) | n <- names xs] where names = nub . map (\(Rule s _) -> s) collect n = map (\(Rule _ r) -> r) . filter (\(Rule s _) -> s == n) -- |Get rule from a RuleSet rule :: String -> RuleSet -> [Rule] rule name rs = case Map.lookup name rs of Nothing -> [] Just xs -> map (Rule name) xs -- |Check a rule set, return: (errors, warnings) check :: RuleSet -> ([String], [String]) check rs = (missing ++ leftRec, unused) where missing = ["missing non-terminal: " ++ n | n <- right, n `notElem` left] leftRec = ["direct left-recusion: " ++ n | n <- left, hasLeftRec n] unused = ["unused non-terminal: " ++ n | n <- left , n /= "__start" , n `notElem` right] hasLeftRec n = hasLeftRec' n /= [] hasLeftRec' n = [0 | (Rule _ (x:_)) <- rule n rs, hasLeftRec'' n x] hasLeftRec'' n (NonTerm s) = n == s hasLeftRec'' _ _ = False left = Map.keys rs right = nub $ concat [nonTerm xs | n <- left, (Rule _ xs) <- rule n rs] nonTerm [] = [] nonTerm (NonTerm s : xs) = s : nonTerm xs nonTerm (_:xs) = nonTerm xs -- |First set type type FirstSet = Map.Map String [Rule] -- |Compute the complete first set firstSet :: RuleSet -> FirstSet firstSet rs = Map.mapWithKey (\k _ -> find k rs) rs where find name rs = nub . sort $ concatMap compute $ noLeftRec $ rule name rs noLeftRec = filter (\(Rule a (x:_)) -> case x of NonTerm b -> a /= b _ -> True) compute (Rule _ [Empty]) = [Empty] compute (Rule name (x:xs)) = case compute x of [Empty] -> compute $ Rule name xs a -> a compute a@(Term _) = [a] compute a@(TermToken _) = [a] compute (NonTerm s) = find s rs compute Empty = [Empty] -- |Compute first set of a given rule first :: Rule -> FirstSet -> [Rule] first Empty _ = [Empty] first a@(Term _) _ = [a] first a@(TermToken _) _ = [a] first (NonTerm s) fs = fromMaybe [Empty] (Map.lookup s fs) first (Rule _ (x:_)) fs = first x fs -- |Separate rules into (parsing rules, lexing rules) separate :: [Rule] -> ([Rule], [Rule]) separate rs = nonTermToToken (filter (not . hasRegex) rs, filter hasRegex rs) where hasRegex (Rule _ []) = False hasRegex (Rule r (x:xs)) = hasRegex x || hasRegex (Rule r xs) hasRegex (NonTerm _) = False hasRegex (Term _) = False hasRegex (TermToken _) = False hasRegex Empty = False hasRegex (Concat []) = False hasRegex (Concat (x:xs)) = hasRegex x || hasRegex (Concat xs) hasRegex (RegEx _) = True hasRegex (RegExString _) = True -- |Transform NonTerm into TermToken, when needed nonTermToToken :: ([Rule], [Rule]) -> ([Rule], [Rule]) nonTermToToken (rs, lrs) = (mappers rs, mappers lrs) where mappers = map (\(Rule r xs) -> Rule r $ map (replaceNonTerm tok) xs) tok = map (\(Rule r _) -> r) lrs replaceNonTerm [] r = r replaceNonTerm (t:ts) r@(NonTerm nt) = if nt == t then TermToken t else replaceNonTerm ts r replaceNonTerm (_:ts) r = replaceNonTerm ts r -- |Transform lexing rules to have only one RegEx on right regexfy :: [Rule] -> [Rule] regexfy lrs = concatMap replace lrs where replace (Rule r xs) = [Rule r $ bind [RegEx ""] $ concatMap replace xs] replace (TermToken nt) = concatMap replace $ find nt replace x = [x] bind acc [Empty] = acc ++ [Empty] bind (RegEx a:acc) (RegEx b:xs) = bind [RegEx $ a ++ b] xs bind (RegEx a:acc) (RegExString b:xs) = bind [RegEx $ a ++ toRegex b] xs find r = let (Rule _ xs:_) = rule r rs in init xs toRegex s = '(' : concat [['[',c,']'] | c <- s] ++ ")" rs = ruleSet lrs