/----------------------------------------------------------------------------- The Grammar data type. (c) 1993-2001 Andy Gill, Simon Marlow ----------------------------------------------------------------------------- Mangler converts AbsSyn to Grammar > module Happy.Frontend.Mangler (mangler) where > import Happy.CodeGen.Common.Options > import Happy.Grammar > import Happy.Frontend.AbsSyn > import Happy.Frontend.Mangler.Monad This is only supported in the bootstrapped version #ifdef HAPPY_BOOTSTRAP > import Happy.Frontend.AttrGrammar.Mangler #endif > import Happy.Frontend.ParamRules > import Data.Array ( Array, (!), accumArray, array, listArray ) > import Data.Char ( isAlphaNum, isDigit, isLower ) > import Data.List ( zip4, sortBy ) > import Data.Maybe ( fromMaybe ) > import Data.Ord > import Control.Monad.Writer ( Writer, mapWriter, runWriter ) ----------------------------------------------------------------------------- -- The Mangler This bit is a real mess, mainly because of the error message support. > mangler :: FilePath -> AbsSyn -> Either [ErrMsg] (Grammar, CommonOptions) > mangler file abssyn > | null errs = Right gd > | otherwise = Left errs > where (gd, errs) = runWriter (manglerM file abssyn) > manglerM :: FilePath -> AbsSyn -> M (Grammar, CommonOptions) > manglerM file (AbsSyn dirs rules') = > -- add filename to all error messages > mapWriter (\(a,e) -> (a, map (\s -> file ++ ": " ++ s) e)) $ do > rules <- case expand_rules rules' of > Left err -> addErr err >> return [] > Right as -> return as > nonterm_strs <- checkRules [n | Rule1 n _ _ <- rules] "" [] > let > terminal_strs = concat (map getTerm dirs) ++ [eofName] > n_starts = length starts' > n_nts = length nonterm_strs > n_ts = length terminal_strs > first_nt = firstStartTok + n_starts > first_t = first_nt + n_nts > last_start = first_nt - 1 > last_nt = first_t - 1 > last_t = first_t + n_ts - 1 > start_names = [ firstStartTok .. last_start ] > nonterm_names = [ first_nt .. last_nt ] > terminal_names = [ first_t .. last_t ] > starts' = case getParserNames dirs of > [] -> [TokenName "happyParse" Nothing False] > ns -> ns > > start_strs = [ startName++'_':p | (TokenName p _ _) <- starts' ] Build up a mapping from name values to strings. > name_env = (errorTok, errorName) : > (dummyTok, dummyName) : > zip start_names start_strs ++ > zip nonterm_names nonterm_strs ++ > zip terminal_names terminal_strs > lookupName :: String -> [Name] > lookupName n = [ t | (t,r) <- name_env, r == n ] > mapToName str' = > case lookupName str' of > [a] -> return a > [] -> do addErr ("unknown identifier '" ++ str' ++ "'") > return errorTok > (a:_) -> do addErr ("multiple use of '" ++ str' ++ "'") > return a Start symbols... > -- default start token is the first non-terminal in the grammar > lookupStart (TokenName _ Nothing _) = return first_nt > lookupStart (TokenName _ (Just n) _) = mapToName n > lookupStart _ = error "lookupStart: Not a TokenName" > -- in > start_toks <- mapM lookupStart starts' > let > parser_names = [ s | TokenName s _ _ <- starts' ] > start_partials = [ b | TokenName _ _ b <- starts' ] > start_prods = zipWith (\nm tok -> Production nm [tok] ("no code",[]) No) > start_names start_toks Deal with priorities... > priodir = zip [1..] (getPrios dirs) > > mkPrio :: Int -> Directive a -> Priority > mkPrio i (TokenNonassoc _) = Prio None i > mkPrio i (TokenRight _) = Prio RightAssoc i > mkPrio i (TokenLeft _) = Prio LeftAssoc i > mkPrio _ _ = error "Panic: impossible case in mkPrio" > prios = [ (name,mkPrio i dir) > | (i,dir) <- priodir > , nm <- getPrioNames dir > , name <- lookupName nm > ] > prioByString = [ (name, mkPrio i dir) > | (i,dir) <- priodir > , name <- getPrioNames dir > ] Translate the rules from string to name-based. > convNT (Rule1 nt prods ty) > = do nt' <- mapToName nt > return (nt', prods, ty) > > attrs = getAttributes dirs > attrType = fromMaybe "HappyAttrs" (getAttributetype dirs) > > transRule (nt, prods, _ty) > = mapM (finishRule nt) prods > > finishRule :: Name -> Prod1 -> Writer [ErrMsg] Production > finishRule nt (Prod1 lhs code line prec) > = mapWriter (\(a,e) -> (a, map (addLine line) e)) $ do > lhs' <- mapM mapToName lhs > code' <- checkCode (length lhs) lhs' nonterm_names code attrs > case mkPrec lhs' prec of > Left s -> do addErr ("Undeclared precedence token: " ++ s) > return (Production nt lhs' code' No) > Right p -> return (Production nt lhs' code' p) > > mkPrec :: [Name] -> Prec -> Either String Priority > mkPrec lhs PrecNone = > case filter (flip elem terminal_names) lhs of > [] -> Right No > xs -> case lookup (last xs) prios of > Nothing -> Right No > Just p -> Right p > mkPrec _ (PrecId s) = > case lookup s prioByString of > Nothing -> Left s > Just p -> Right p > > mkPrec _ PrecShift = Right PrioLowest > > -- in > rules1 <- mapM convNT rules > rules2 <- mapM transRule rules1 > let > type_env = [(nt, t) | Rule1 nt _ (Just (t,[])) <- rules] ++ > [(nt, getTokenType dirs) | nt <- terminal_strs] -- XXX: Doesn't handle $$ type! > > fixType (ty,s) = go "" ty > where go acc [] = return (reverse acc) > go acc (c:r) | isLower c = -- look for a run of alphanumerics starting with a lower case letter > let (cs,r1) = span isAlphaNum r > go1 x = go (reverse x ++ acc) r1 > in case lookup (c:cs) s of > Nothing -> go1 (c:cs) -- no binding found > Just a -> case lookup a type_env of > Nothing -> do > addErr ("Parameterized rule argument '" ++ a ++ "' does not have type") > go1 (c:cs) > Just t -> go1 $ "(" ++ t ++ ")" > | otherwise = go (c:acc) r > > convType (nm, t) > = do t' <- fixType t > return (nm, t') > > -- in > tys <- mapM convType [ (nm, t) | (nm, _, Just t) <- rules1 ] > > let > type_array :: Array Int (Maybe String) > type_array = accumArray (\_ x -> x) Nothing (first_nt, last_nt) > [ (nm, Just t) | (nm, t) <- tys ] > env_array :: Array Int String > env_array = array (errorTok, last_t) name_env > -- in Get the token specs in terms of Names. > let > fixTokenSpec (a,b) = do n <- mapToName a; return (n,b) > -- in > tokspec <- mapM fixTokenSpec (getTokenSpec dirs) > let > ass = combinePairs [ (a,no) > | (Production a _ _ _,no) <- zip productions' [0..] ] > arr = array (firstStartTok, length ass - 1 + firstStartTok) ass > lookup_prods :: Name -> [Int] > lookup_prods x | x >= firstStartTok && x < first_t = arr ! x > lookup_prods _ = error "lookup_prods" > > productions' = start_prods ++ concat rules2 > prod_array = listArray (0,length productions' - 1) productions' > -- in > return (Grammar { > productions = productions', > lookupProdNo = (prod_array !), > lookupProdsOfName = lookup_prods, > token_specs = tokspec, > terminals = errorTok : terminal_names, > non_terminals = start_names ++ nonterm_names, > -- INCLUDES the %start tokens > starts = zip4 parser_names start_names start_toks > start_partials, > types = type_array, > token_names = env_array, > first_nonterm = first_nt, > first_term = first_t, > eof_term = last terminal_names, > priorities = prios, > attributes = attrs, > attributetype = attrType > }, > CommonOptions { > imported_identity = getImportedIdentity dirs, > monad = getMonad dirs, > lexer = getLexer dirs, > error_handler = getError dirs, > error_sig = getErrorHandlerType dirs, > token_type = getTokenType dirs, > expect = getExpect dirs > }) Gofer-like stuff: > combinePairs :: (Ord a) => [(a,b)] -> [(a,[b])] > combinePairs xs = > combine [ (a,[b]) | (a,b) <- sortBy (comparing fst) xs] > where > combine [] = [] > combine ((a,b):(c,d):r) | a == c = combine ((a,b++d) : r) > combine (a:r) = a : combine r > For combining actions with possible error messages. > addLine :: Int -> String -> String > addLine l s = show l ++ ": " ++ s > getTerm :: Directive a -> [a] > getTerm (TokenSpec stuff) = map fst stuff > getTerm _ = [] So is this. > checkRules :: [String] -> String -> [String] -> Writer [ErrMsg] [String] > checkRules (name:rest) above nonterms > | name == above = checkRules rest name nonterms > | name `elem` nonterms > = do addErr ("Multiple rules for '" ++ name ++ "'") > checkRules rest name nonterms > | otherwise = checkRules rest name (name : nonterms) > checkRules [] _ nonterms = return (reverse nonterms) ----------------------------------------------------------------------------- -- If any attribute directives were used, we are in an attribute grammar, so -- go do special processing. If not, pass on to the regular processing routine > checkCode :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> M (String,[Int]) > checkCode arity _ _ code [] = doCheckCode arity code #ifdef HAPPY_BOOTSTRAP > checkCode arity lhs nonterm_names code attrs = rewriteAttributeGrammar arity lhs nonterm_names code attrs #else > checkCode arity _ _ code (_:_) = do > addErr "Attribute grammars are not supported in non-bootstrapped build" > doCheckCode arity code #endif ----------------------------------------------------------------------------- -- Check for every $i that i is <= the arity of the rule. -- At the same time, we collect a list of the variables actually used in this -- code, which is used by the backend. > doCheckCode :: Int -> String -> M (String, [Int]) > doCheckCode arity code0 = go code0 "" [] > where go code acc used = > case code of > [] -> return (reverse acc, used) > > '"' :r -> case reads code :: [(String,String)] of > [] -> go r ('"':acc) used > (s,r'):_ -> go r' (reverse (show s) ++ acc) used > a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc) used > '\'' :r -> case reads code :: [(Char,String)] of > [] -> go r ('\'':acc) used > (c,r'):_ -> go r' (reverse (show c) ++ acc) used > '\\':'$':r -> go r ('$':acc) used > > '$':'>':r -- the "rightmost token" > | arity == 0 -> do addErr "$> in empty rule" > go r acc used > | otherwise -> go r (reverse (mkHappyVar arity) ++ acc) > (arity : used) > > '$':r@(i:_) | isDigit i -> > case reads r :: [(Int,String)] of > (j,r'):_ -> > if j > arity > then do addErr ('$': show j ++ " out of range") > go r' acc used > else go r' (reverse (mkHappyVar j) ++ acc) > (j : used) > [] -> error "doCheckCode []" > c:r -> go r (c:acc) used > mkHappyVar :: Int -> String > mkHappyVar n = "happy_var_" ++ show n