module Parse where import Prelude hiding (lookup) import Text.ParserCombinators.Parsec import Control.Monad import Data.Map import Data lexeme p = do x <- p spaces return x braces p = between (lexeme (char '{')) (lexeme (char '}')) p parseHeader = lexeme (do lexeme (string "%{") css <- many parseHeaderAux string "}" return $ concat css) parseHeaderAux :: Parser String parseHeaderAux = parseString <|> parseHChar '\'' <|> parseBraces <|> parseOther parseHChar cEnd = do char '\'' cs <- parseChar cEnd char '\'' return $ "'" ++ cs ++ "'" parseOther = many1 $ noneOf ['{', '"', '\'', '}'] parseString :: Parser String parseString = do char '"' css <- many (parseChar '"') char '"' return $ "\"" ++ concat css ++ "\"" parseBraces :: Parser String parseBraces = do char '{' css <- many parseHeaderAux char '}' return $ "{" ++ concat css ++ "}" data TreeAction = TreeAction [TreeAction] | LeafAction Action deriving Show flattenAction :: TreeAction -> [Action] flattenAction (LeafAction action) = [action] flattenAction (TreeAction actions) = concatMap flattenAction actions parseActions = between (char '{') (char '}') (liftM (concatMap flattenAction) (many parseActionAux)) parseActionAux = liftM LeafAction parseFString <|> liftM LeafAction parseFChar <|> parseFBraces <|> liftM LeafAction parseMatch <|> liftM (LeafAction . Fill) parseFills parseFString :: Parser Action parseFString = do char '"' css <- many (parseChar '"') char '"' return $ Fill $ "\"" ++ concat css ++ "\"" parseFChar :: Parser Action parseFChar = do char '\'' cs <- parseChar '\'' char '\'' return $ Fill $ "'" ++ cs ++ "'" parseFBraces :: Parser TreeAction parseFBraces = do char '{' actions <- many parseActionAux char '}' return $ TreeAction $ [LeafAction $ Fill "{"] ++ actions ++ [LeafAction $ Fill "}"] parseChar cEnd = do c <- satisfy (/= cEnd) case c of '\\' -> do c' <- anyChar if c' == cEnd then return [c, cEnd] else return [c] _ -> return [c] parseMatch :: Parser Action parseMatch = (do char '$' i <- parseInt return $ Match i) "matcher" parseFills = many1 parseFill parseFill :: Parser Char parseFill = noneOf ['{', '"', '\'', '$', '}'] parseInt = do cs <- many1 digit return $ read cs parseTable = do is <- many parseTableItem return $ fromList is parseTableItem = lexeme (do try (lexeme (string "%token")) name <- lexeme identifier t <- lexeme identifier return (name, t)) parseEntry table = lexeme (do try (lexeme (string "%entry")) name <- lexeme identifier case lookup name table of Just _ -> return name Nothing -> fail $ "Not a token: " ++ name) identifier = (do c <- letter <|> satisfy (== '_') cs <- many (letter <|> digit) return (c : cs)) "identifier" parseGenerators table = many (parseGenerator table) parseGenerator table = do name <- lexeme identifier case lookup name table of Just _ -> do lexeme (char '=') body <- sepBy parseBranch (lexeme (char '|')) lexeme (char ';') return $ Generator name body Nothing -> fail $ "Not a token: " ++ name parseBranch = do matches <- parseMatches actions <- lexeme parseActions return (matches, actions) parseMatches = parseChainL <|> parseNormal parseChainL = do try (lexeme (string "<|")) op <- lexeme identifier opToken <- parseToken token <- parseToken return $ ChainL op opToken token parseNormal = do tokens <- many parseToken return $ Normal tokens parseToken = (parseTerminal <|> parseNonTerminal) "token" parseTerminal = liftM Terminal $ (lexeme (between (char '"') (char '"') identifier) "terminal") parseNonTerminal = liftM NonTerminal $ (lexeme identifier "non-terminal") parseAurochs = do header <- parseHeader <|> return "" table <- parseTable name <- parseEntry table g <- parseGenerators table extra <- parseHeader <|> return "" return (header, table, name, g, extra)