module ReadGrammar where
import CFG
import Data.List(intersperse)
import System.IO
import System.Environment (getArgs)
data LitGrm = LitGrm { start :: Maybe String, rules :: [(String, [[String]])], rhss :: [[String]] }
readGrammar :: Monad m => [String] -> m (Maybe String, [ProductionRule])
readGrammar lines = do
startLhsRhssPairList <- rep NoState lines
let startsymbol = start startLhsRhssPairList
let lhsRhssPairList = rules startLhsRhssPairList
let nonterminals = map fst lhsRhssPairList
return (startsymbol, concat (map (convert nonterminals) lhsRhssPairList))
convert :: [String] -> (String, [[String]]) -> [ProductionRule]
convert nonterminals (lhs, rhss) =
map (\rhs -> ProductionRule lhs
(map (\s -> if s `elem` nonterminals
then Nonterminal s
else Terminal s) rhs)) rhss
data State =
NoState
| StartSymbol String
| Lhs String
| Rhs [[String]]
deriving Eq
begin :: Monad m => [Char] -> m State
begin [] = return NoState
begin ('@':'s':'t':'a':'r':'t':' ':cs) = return (StartSymbol (takeWord cs))
begin (';':cs) = return NoState
begin (' ':' ':'=':[]) = return (Rhs [[]])
begin (' ':' ':'=':' ':cs) = return (Rhs [words cs])
begin (' ':' ':'|':' ':cs) = return (Rhs [words cs])
begin cs =
let w = takeWord cs in
case w of
[] -> return NoState
_ -> return (Lhs w)
takeWord :: String -> String
takeWord [] = []
takeWord (' ':cs) = []
takeWord ('\t':cs) = []
takeWord (c:cs) = c : takeWord cs
rep :: Monad m => State -> [String] -> m LitGrm
rep (Lhs lhs) [] = error "rep: Can't end with Lhs"
rep (_) [] = return $ LitGrm {start=Nothing, rules=[], rhss=[]}
rep prestate (s:ss) = do
state <- begin s
startLhsRhsPairList <- rep state ss
case (prestate, state) of
(NoState, NoState) -> return startLhsRhsPairList
(NoState, StartSymbol s) ->
case start startLhsRhsPairList of
Just s' -> error $ "rep: StartSymbol duplicated: " ++ s ++ ", " ++ s'
Nothing -> return startLhsRhsPairList {start = Just s}
(NoState, Lhs lhs) ->
let rules_ = rules startLhsRhsPairList
rhss_ = rhss startLhsRhsPairList
in return startLhsRhsPairList { rules=(lhs,rhss_):rules_, rhss=[] }
(NoState, Rhs rhss) -> error "rep: Nostate can't change to Rule lhs rhss."
(Lhs lhs, NoState) -> error $ "rep: Lhs " ++ lhs ++ " can't change to Nostate."
(Lhs lhs, StartSymbol s) -> error $ "rep: Lhs " ++ lhs ++ " can't change to StartSymbol " ++ s
(Lhs lhs, Lhs lhs') -> error $ "rep: Lhs " ++ lhs ++ " can't change to " ++ lhs'
(Lhs lhs, Rhs rhss_) ->
let rhss__ = rhss startLhsRhsPairList
in return startLhsRhsPairList {rhss = rhss_ ++ rhss__}
(Rhs rhss, NoState) -> return startLhsRhsPairList
(Rhs rhss, StartSymbol s) -> error $ "rep: Rhs can't change to StartSymbol " ++ s
(Rhs _, Lhs _) -> error "rep: Rhs can't change to Lhs lhs."
(Rhs _, Rhs rhss_) ->
let rhss__ = rhss startLhsRhsPairList
in return startLhsRhsPairList {rhss = rhss_ ++ rhss__}
(StartSymbol s, NoState) -> return startLhsRhsPairList
(StartSymbol s, StartSymbol s') -> error $ "rep: StartSymbol duplicated(4): " ++ s ++ ", " ++ s'
(StartSymbol s, Lhs lhs) ->
let rules_ = rules startLhsRhsPairList
rhss_ = rhss startLhsRhsPairList
in return startLhsRhsPairList { rules=(lhs,rhss_):rules_, rhss=[] }
(StartSymbol s, Rhs rhss) -> error $ "rep: StartSymbol " ++ s ++ " can't change to Rule"
test fun = do
args <- getArgs
repTest fun args
repTest fun [] = return ()
repTest fun (arg:args) = do
text <- readFile arg
fun text
repTest fun args
parsing text = do
startLhsRhssPairList <- rep NoState (lines text)
let startsymbol = start startLhsRhssPairList
let lhsRhssPairList = rules startLhsRhssPairList
mapM_ (\(lhs,rhss) -> prLhsRhss lhs rhss) lhsRhssPairList
prLhsRhss :: String -> [[String]] -> IO ()
prLhsRhss lhs rhss = do
putStrLn lhs
mapM_ (\rhs ->
do { putStr "\t"
; mapM_ (\s -> do {putStr s; putStr " "}) rhs
; putStrLn ""} ) rhss
conversion text = do
(startsymbol_, prodrules_) <- readGrammar (lines text)
case startsymbol_ of
Nothing -> error "conversion: No start symbol"
Just startsymbol ->
do
let startsymbol' = startsymbol ++ "'"
let startprod = ProductionRule startsymbol' [ Nonterminal startsymbol ]
let prodrules = startprod : prodrules_
putStr $ "CFG " ++ show startsymbol' ++ " [\n "
putStrLn $ concat (intersperse ",\n " (map prodRuleToStr prodrules))
putStrLn $ "]"