{- BNF Converter: Java JLex generator Copyright (C) 2004 Author: Michael Pellauer This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- ************************************************************** BNF Converter Module Description : This module generates the JLex input file. This file is quite different than Alex or Flex. Author : Michael Pellauer (pellauer@cs.chalmers.se), Bjorn Bringert (bringert@cs.chalmers.se) License : GPL (GNU General Public License) Created : 25 April, 2003 Modified : 4 Nov, 2004 ************************************************************** -} module BNFC.Backend.Java.CFtoJLex15 ( cf2jlex ) where import BNFC.CF import BNFC.Backend.Java.RegToJLex import BNFC.Utils ( (+++) ) import BNFC.Backend.Common.NamedVariables import Data.List --The environment must be returned for the parser to use. cf2jlex :: String -> String -> CF -> (String, SymEnv) cf2jlex packageBase packageAbsyn cf = (unlines $ concat $ [ prelude packageBase packageAbsyn, cMacros, lexSymbols env, restOfJLex cf ], env) where env = makeSymEnv (symbols cf ++ reservedWords cf) (0 :: Int) makeSymEnv [] _ = [] makeSymEnv (s:symbs) n = (s, "_SYMB_" ++ (show n)) : (makeSymEnv symbs (n+1)) prelude :: String -> String -> [String] prelude packageBase packageAbsyn = [ "// This JLex file was machine-generated by the BNF converter", "package" +++ packageBase ++ ";", "", "import java_cup.runtime.*;", "%%", "%cup", "%unicode", "%line", "%public", "%{", " String pstring = new String();", " public int line_num() { return (yyline+1); }", " public String buff() { return new String(yy_buffer,yy_buffer_index,10).trim(); }", "%}" ] --For now all categories are included. --Optimally only the ones that are used should be generated. cMacros :: [String] cMacros = [ "LETTER = ({CAPITAL}|{SMALL})", "CAPITAL = [A-Z\\xC0-\\xD6\\xD8-\\xDE]", "SMALL = [a-z\\xDF-\\xF6\\xF8-\\xFF]", "DIGIT = [0-9]", "IDENT = ({LETTER}|{DIGIT}|['_])", "%state COMMENT", "%state CHAR", "%state CHARESC", "%state CHAREND", "%state STRING", "%state ESCAPED", "%%" ] lexSymbols :: SymEnv -> [String] lexSymbols ss = map transSym ss where transSym (s,r) = "" ++ (escapeChars s) ++ " { return new Symbol(sym." ++ r ++ "); }" restOfJLex :: CF -> [String] restOfJLex cf = [ lexComments (comments cf), userDefTokens, ifC catString strStates, ifC catChar chStates, ifC catDouble "{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? { return new Symbol(sym._DOUBLE_, new Double(yytext())); }", ifC catInteger "{DIGIT}+ { return new Symbol(sym._INTEGER_, new Integer(yytext())); }", ifC catIdent "{LETTER}{IDENT}* { return new Symbol(sym._IDENT_, yytext().intern()); }" , "[ \\t\\r\\n\\f] { /* ignore white space. */ }" ] where ifC cat s = if isUsedCat cf cat then s else "" userDefTokens = unlines $ ["" ++ printRegJLex exp +++ "{ return new Symbol(sym." ++ show name ++ ", yytext().intern()); }" | (name, exp) <- tokenPragmas cf] strStates = unlines --These handle escaped characters in Strings. [ "\"\\\"\" { yybegin(STRING); }", "\\\\ { yybegin(ESCAPED); }", "\\\" { String foo = pstring; pstring = new String(); yybegin(YYINITIAL); return new Symbol(sym._STRING_, foo.intern()); }", ". { pstring += yytext(); }", "n { pstring += \"\\n\"; yybegin(STRING); }", "\\\" { pstring += \"\\\"\"; yybegin(STRING); }", "\\\\ { pstring += \"\\\\\"; yybegin(STRING); }", "t { pstring += \"\\t\"; yybegin(STRING); }", ". { pstring += yytext(); yybegin(STRING); }" ] chStates = unlines --These handle escaped characters in Chars. [ "\"'\" { yybegin(CHAR); }", "\\\\ { yybegin(CHARESC); }", "[^'] { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character(yytext().charAt(0))); }", "n { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character('\\n')); }", "t { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character('\\t')); }", ". { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character(yytext().charAt(0))); }", "\"'\" {yybegin(YYINITIAL);}" ] lexComments :: ([(String, String)], [String]) -> String lexComments (m,s) = (unlines (map lexSingleComment s)) ++ (unlines (map lexMultiComment m)) lexSingleComment :: String -> String lexSingleComment c = "\"" ++ c ++ "\"[^\\n]*\\n { /* BNFC single-line comment */ }" --There might be a possible bug here if a language includes 2 multi-line comments. --They could possibly start a comment with one character and end it with another. --However this seems rare. lexMultiComment :: (String, String) -> String lexMultiComment (b,e) = unlines [ "\"" ++ b ++ "\" { yybegin(COMMENT); }", "\"" ++ e ++ "\" { yybegin(YYINITIAL); }", ". { }", "[\\n] { }" ] -- lexReserved :: String -> String -- lexReserved s = "\"" ++ s ++ "\" { return new Symbol(sym.TS, yytext()); }" --Helper function that escapes characters in strings escapeChars :: String -> String escapeChars = concatMap escapeChar