module ReadGrammar where import CFG import Data.List(intersperse) import System.IO import System.Environment (getArgs) data LitGrm = LitGrm { start :: Maybe String, rules :: [(String, [[String]])], rhss :: [[String]] } readGrammar :: Monad m => [String] -> m (Maybe String, [ProductionRule]) readGrammar lines = do startLhsRhssPairList <- rep NoState lines let startsymbol = start startLhsRhssPairList let lhsRhssPairList = rules startLhsRhssPairList let nonterminals = map fst lhsRhssPairList return (startsymbol, concat (map (convert nonterminals) lhsRhssPairList)) -- Checking convert :: [String] -> (String, [[String]]) -> [ProductionRule] convert nonterminals (lhs, rhss) = map (\rhs -> ProductionRule lhs (map (\s -> if s `elem` nonterminals then Nonterminal s else Terminal s) rhs)) rhss -- Parsing data State = NoState | StartSymbol String | Lhs String | Rhs [[String]] deriving Eq -- Note -- * take the first word. After that, it may be regarded as a comment. begin :: Monad m => [Char] -> m State begin [] = return NoState begin ('@':'s':'t':'a':'r':'t':' ':cs) = return (StartSymbol (takeWord cs)) begin (';':cs) = return NoState begin (' ':' ':'=':[]) = return (Rhs [[]]) begin (' ':' ':'=':' ':cs) = return (Rhs [words cs]) begin (' ':' ':'|':' ':cs) = return (Rhs [words cs]) begin cs = let w = takeWord cs in case w of [] -> return NoState _ -> return (Lhs w) takeWord :: String -> String takeWord [] = [] takeWord (' ':cs) = [] takeWord ('\t':cs) = [] takeWord (c:cs) = c : takeWord cs rep :: Monad m => State -> [String] -> m LitGrm rep (Lhs lhs) [] = error "rep: Can't end with Lhs" rep (_) [] = return $ LitGrm {start=Nothing, rules=[], rhss=[]} rep prestate (s:ss) = do state <- begin s startLhsRhsPairList <- rep state ss case (prestate, state) of (NoState, NoState) -> return startLhsRhsPairList (NoState, StartSymbol s) -> case start startLhsRhsPairList of Just s' -> error $ "rep: StartSymbol duplicated: " ++ s ++ ", " ++ s' Nothing -> return startLhsRhsPairList {start = Just s} (NoState, Lhs lhs) -> let rules_ = rules startLhsRhsPairList rhss_ = rhss startLhsRhsPairList in return startLhsRhsPairList { rules=(lhs,rhss_):rules_, rhss=[] } (NoState, Rhs rhss) -> error "rep: Nostate can't change to Rule lhs rhss." (Lhs lhs, NoState) -> error $ "rep: Lhs " ++ lhs ++ " can't change to Nostate." (Lhs lhs, StartSymbol s) -> error $ "rep: Lhs " ++ lhs ++ " can't change to StartSymbol " ++ s (Lhs lhs, Lhs lhs') -> error $ "rep: Lhs " ++ lhs ++ " can't change to " ++ lhs' (Lhs lhs, Rhs rhss_) -> let rhss__ = rhss startLhsRhsPairList in return startLhsRhsPairList {rhss = rhss_ ++ rhss__} (Rhs rhss, NoState) -> return startLhsRhsPairList (Rhs rhss, StartSymbol s) -> error $ "rep: Rhs can't change to StartSymbol " ++ s (Rhs _, Lhs _) -> error "rep: Rhs can't change to Lhs lhs." (Rhs _, Rhs rhss_) -> let rhss__ = rhss startLhsRhsPairList in return startLhsRhsPairList {rhss = rhss_ ++ rhss__} (StartSymbol s, NoState) -> return startLhsRhsPairList (StartSymbol s, StartSymbol s') -> error $ "rep: StartSymbol duplicated(4): " ++ s ++ ", " ++ s' (StartSymbol s, Lhs lhs) -> let rules_ = rules startLhsRhsPairList rhss_ = rhss startLhsRhsPairList in return startLhsRhsPairList { rules=(lhs,rhss_):rules_, rhss=[] } (StartSymbol s, Rhs rhss) -> error $ "rep: StartSymbol " ++ s ++ " can't change to Rule" ---- -- For testing with grm/polyrpc.lgrm -- test fun = do args <- getArgs repTest fun args repTest fun [] = return () repTest fun (arg:args) = do text <- readFile arg fun text repTest fun args parsing text = do startLhsRhssPairList <- rep NoState (lines text) let startsymbol = start startLhsRhssPairList let lhsRhssPairList = rules startLhsRhssPairList mapM_ (\(lhs,rhss) -> prLhsRhss lhs rhss) lhsRhssPairList prLhsRhss :: String -> [[String]] -> IO () prLhsRhss lhs rhss = do putStrLn lhs mapM_ (\rhs -> do { putStr "\t" ; mapM_ (\s -> do {putStr s; putStr " "}) rhs ; putStrLn ""} ) rhss conversion text = do (startsymbol_, prodrules_) <- readGrammar (lines text) case startsymbol_ of Nothing -> error "conversion: No start symbol" Just startsymbol -> do let startsymbol' = startsymbol ++ "'" let startprod = ProductionRule startsymbol' [ Nonterminal startsymbol ] let prodrules = startprod : prodrules_ putStr $ "CFG " ++ show startsymbol' ++ " [\n " -- May replace prodRuleToStr with show putStrLn $ concat (intersperse ",\n " (map prodRuleToStr prodrules)) putStrLn $ "]"