{- BNF Converter: ocamllex Generator Copyright (C) 2005 Author: Kristofer Johannisson This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -- based on BNFC Haskell backend module CFtoOCamlLex (cf2ocamllex) where import Data.List import Data.Char import CF import AbsBNF import CFtoOCamlYacc (terminal) import Utils ((+++)) cf2ocamllex :: String -> String -> CF -> String cf2ocamllex name parserMod cf = unlines $ concat $ intersperse [""] [ header parserMod cf, definitions cf, let r = rules cf in case r of [] -> [] x:xs -> ("rule" +++ x) : map ("and" +++) xs ] header :: String -> CF -> [String] header parserMod cf = [ "(* This ocamllex file was machine-generated by the BNF converter *)", "{", "open " ++ parserMod, "open Lexing", "", hashtables cf, "", "let unescapeInitTail (s:string) : string =", " let rec unesc s = match s with", " '\\\\'::c::cs when List.mem c ['\\\"'; '\\\\'; '\\\''] -> c :: unesc cs", " | '\\\\'::'n'::cs -> '\\n' :: unesc cs", " | '\\\\'::'t'::cs -> '\\t' :: unesc cs", " | '\\\"'::[] -> []", " | c::cs -> c :: unesc cs", " | _ -> []", " (* explode/implode from caml FAQ *)", " in let explode (s : string) : char list =", " let rec exp i l =", " if i < 0 then l else exp (i - 1) (s.[i] :: l) in", " exp (String.length s - 1) []", " in let implode (l : char list) : string =", " let res = String.create (List.length l) in", " let rec imp i = function", " | [] -> res", " | c :: l -> res.[i] <- c; imp (i + 1) l in", " imp 0 l", " in implode (unesc (List.tl (explode s)))", "", "let incr_lineno (lexbuf:Lexing.lexbuf) : unit =", " let pos = lexbuf.lex_curr_p in", " lexbuf.lex_curr_p <- { pos with", " pos_lnum = pos.pos_lnum + 1;", " pos_bol = pos.pos_cnum;", " }", "}" ] -- | set up hashtables for reserved symbols and words hashtables :: CF -> String hashtables cf = ht "symbol_table" (symbols cf ) ++ "\n" ++ ht "resword_table" (reservedWords cf) where ht table syms | length syms == 0 = "" ht table syms = unlines [ "let" +++ table +++ "= Hashtbl.create " ++ show (length syms), "let _ = List.iter (fun (kwd, tok) -> Hashtbl.add" +++ table +++ "kwd tok)", " [" ++ concat (intersperse ";" keyvals) ++ "]" ] where keyvals = map (\(x,y) -> "(" ++ x ++ ", " ++ y ++ ")") (zip (map show syms) (map (terminal cf) syms)) definitions :: CF -> [String] definitions cf = concat [ cMacros, rMacros cf ] cMacros :: [String] cMacros = [ "let l = ['a'-'z' 'A'-'Z' '\\192' - '\\255'] # ['\\215' '\\247'] (* isolatin1 letter FIXME *)", "let c = ['A'-'Z' '\\192'-'\\221'] # ['\\215'] (* capital isolatin1 letter FIXME *)", "let s = ['a'-'z' '\\222'-'\\255'] # ['\\247'] (* small isolatin1 letter FIXME *)", "let d = ['0'-'9'] (* digit *)", "let i = l | d | ['_' '\\''] (* identifier character *)", "let u = ['\\000'-'\\255'] (* universal: any character *)" ] rMacros :: CF -> [String] rMacros cf = let symbs = symbols cf in (if null symbs then [] else [ "let rsyms = (* reserved words consisting of special symbols *)", " " ++ unwords (intersperse "|" (map mkEsc symbs)) ]) where mkEsc s = "\"" ++ concat (map f s) ++ "\"" f x = if x `elem` ['"','\\'] then "\\" ++ [x] else [x] rules :: CF -> [String] rules cf = oneRule $ concat [ lexComments (comments cf), ["l i* " ++ case reservedWords cf of [] -> "{let id = lexeme lexbuf in TOK_Ident id}" _ -> "{let id = lexeme lexbuf in try Hashtbl.find resword_table id with Not_found -> TOK_Ident id}" ], if null (symbols cf) then [] else ["rsyms {let id = lexeme lexbuf in try Hashtbl.find symbol_table id with Not_found -> failwith (\"internal lexer error: reserved symbol \" ^ id ^ \" not found in hashtable\")}"], ["d+ {let i = lexeme lexbuf in TOK_Integer (int_of_string i)}"], ["d+ '.' d+ ('e' ('-')? d+)? {let f = lexeme lexbuf in TOK_Double (float_of_string f)}"], ["'\\\"' ((u # ['\\\"' '\\\\' '\\n']) | ('\\\\' ('\\\"' | '\\\\' | '\\\'' | 'n' | 't')))* '\\\"' {let s = lexeme lexbuf in TOK_String (unescapeInitTail s)}"], ["[' ' '\\t'] {token lexbuf}"], ["'\\n' {incr_lineno lexbuf; token lexbuf}"], ["eof { TOK_EOF }"] ] where oneRule xs = ["token = \n parse " ++ concat (intersperse "\n | " xs)] lexComments ([],[]) = [] lexComments (xs,s1:ys) = ('\"' : s1 ++ "\"" ++ " (_ # '\\n')* { token lexbuf } (* Toss single line comments *)") : lexComments (xs, ys) lexComments (([l1,l2],[r1,r2]):xs,[]) = (concat $ [ ('\"':l1:l2:"\" ((u # ['"), -- FIXME quotes or escape? (l2:"']) | '"), (r1:"' (u # ['"), (r2:"']))* ('"), (r1:"')+ '"), (r2:"' { token lexbuf } \n") ]) : lexComments (xs, []) lexComments ((_:xs),[]) = lexComments (xs,[]) ------------------------------------------------------------------- -- Modified from the inlined version of @RegToAlex@. ------------------------------------------------------------------- -- modified from pretty-printer generated by the BNF converter -- the top-level printing method printRegOCaml :: Reg -> String printRegOCaml = render . prt 0 -- you may want to change render and parenth render :: [String] -> String render = rend 0 where rend :: Int -> [String] -> String rend i ss = case ss of "[" :ts -> cons "[" $ rend i ts "(" :ts -> cons "(" $ rend i ts t : "," :ts -> cons t $ space "," $ rend i ts t : ")" :ts -> cons t $ cons ")" $ rend i ts t : "]" :ts -> cons t $ cons "]" $ rend i ts t :ts -> space t $ rend i ts _ -> "" cons s t = s ++ t new i s = s space t s = if null s then t else t ++ " " ++ s parenth :: [String] -> [String] parenth ss = ["("] ++ ss ++ [")"] -- the printer class does the job class Print a where prt :: Int -> a -> [String] prtList :: [a] -> [String] prtList = concat . map (prt 0) instance Print a => Print [a] where prt _ = prtList instance Print Char where prt _ c = [show c] -- if isAlphaNum c then [[c]] else ['\\':[c]] prtList s = [show s] -- map (concat . prt 0) s prPrec :: Int -> Int -> [String] -> [String] prPrec i j = if j prPrec i 2 (concat [prt 2 reg0 , prt 3 reg]) RAlt reg0 reg -> prPrec i 1 (concat [prt 1 reg0 , ["|"] , prt 2 reg]) RMinus reg0 reg -> prPrec i 1 (concat [prt 2 reg0 , ["#"] , prt 2 reg]) RStar reg -> prPrec i 3 (concat [prt 3 reg , ["*"]]) RPlus reg -> prPrec i 3 (concat [prt 3 reg , ["+"]]) ROpt reg -> prPrec i 3 (concat [prt 3 reg , ["?"]]) REps -> prPrec i 3 (["\"\""]) -- special construct for eps in ocamllex? RChar c -> prPrec i 3 (concat [prt 0 c]) RAlts str -> prPrec i 3 (concat [["["],prt 0 (concatMap show str),["]"]]) RSeqs str -> prPrec i 2 (concat (map (prt 0) str)) RDigit -> prPrec i 3 (concat [["digit"]]) RLetter -> prPrec i 3 (concat [["letter"]]) RUpper -> prPrec i 3 (concat [["upper"]]) RLower -> prPrec i 3 (concat [["lower"]]) RAny -> prPrec i 3 (concat [["univ"]])