{-# LANGUAGE OverloadedStrings #-}
module BNFC.Backend.OCaml.CFtoOCamlLex (cf2ocamllex) where
import Prelude hiding ((<>))
import Data.Char (ord)
import qualified Data.List as List
import Text.PrettyPrint hiding (render)
import qualified Text.PrettyPrint as PP
import BNFC.Abs
import BNFC.CF
import BNFC.Backend.Common (asciiKeywords, unicodeAndSymbols)
import BNFC.Backend.OCaml.CFtoOCamlYacc (terminal)
import BNFC.Backend.OCaml.OCamlUtil (mkEsc, ocamlTokenName)
import BNFC.Lexing (mkRegMultilineComment)
import BNFC.Utils (cstring, unless)
cf2ocamllex :: String -> String -> CF -> String
cf2ocamllex :: String -> String -> CF -> String
cf2ocamllex String
_ String
parserMod CF
cf = [String] -> String
unlines forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [[a]] -> [a]
List.intercalate [String
""]
[ String -> CF -> [String]
header String
parserMod CF
cf
, [String]
cMacros
, CF -> [String]
rMacros CF
cf
, CF -> [String]
uMacros CF
cf
, [ Doc -> String
PP.render forall a b. (a -> b) -> a -> b
$ CF -> Doc
rules CF
cf ]
]
header :: String -> CF -> [String]
String
parserMod CF
cf = forall a. [a] -> [[a]] -> [a]
List.intercalate [String
""] forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Bool
null) forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
[ [ [ String
"(* Lexer definition for ocamllex. *)"
, String
""
, String
"(* preamble *)"
, String
"{"
, String
"open " forall a. [a] -> [a] -> [a]
++ String
parserMod
, String
"open Lexing"
]
]
, CF -> [[String]]
hashtables CF
cf
, [ [ String
"let unescapeInitTail (s:string) : string ="
, String
" let rec unesc s = match s with"
, String
" '\\\\'::c::cs when List.mem c ['\\\"'; '\\\\'; '\\\''] -> c :: unesc cs"
, String
" | '\\\\'::'n'::cs -> '\\n' :: unesc cs"
, String
" | '\\\\'::'t'::cs -> '\\t' :: unesc cs"
, String
" | '\\\\'::'r'::cs -> '\\r' :: unesc cs"
, String
" | '\\\"'::[] -> []"
, String
" | c::cs -> c :: unesc cs"
, String
" | _ -> []"
, String
" (* explode/implode from caml FAQ *)"
, String
" in let explode (s : string) : char list ="
, String
" let rec exp i l ="
, String
" if i < 0 then l else exp (i - 1) (s.[i] :: l) in"
, String
" exp (String.length s - 1) []"
, String
" in let implode (l : char list) : string ="
, String
" let res = Buffer.create (List.length l) in"
, String
" List.iter (Buffer.add_char res) l;"
, String
" Buffer.contents res"
, String
" in implode (unesc (List.tl (explode s)))"
, String
""
, String
"let incr_lineno (lexbuf:Lexing.lexbuf) : unit ="
, String
" let pos = lexbuf.lex_curr_p in"
, String
" lexbuf.lex_curr_p <- { pos with"
, String
" pos_lnum = pos.pos_lnum + 1;"
, String
" pos_bol = pos.pos_cnum;"
, String
" }"
, String
"}"
]
]
]
hashtables :: CF -> [[String]]
hashtables :: CF -> [[String]]
hashtables CF
cf =
[ String -> [String] -> [String]
ht String
"symbol_table" forall a b. (a -> b) -> a -> b
$ CF -> [String]
unicodeAndSymbols CF
cf
, String -> [String] -> [String]
ht String
"resword_table" forall a b. (a -> b) -> a -> b
$ CF -> [String]
asciiKeywords CF
cf
]
where
ht :: String -> [String] -> [String]
ht :: String -> [String] -> [String]
ht String
table [String]
syms = forall m. Monoid m => Bool -> m -> m
unless (forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
syms) forall a b. (a -> b) -> a -> b
$
[ [String] -> String
unwords [ String
"let", String
table, String
"= Hashtbl.create", forall a. Show a => a -> String
show (forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
syms) ]
, [String] -> String
unwords [ String
"let _ = List.iter (fun (kwd, tok) -> Hashtbl.add", String
table, String
"kwd tok)" ]
, forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [ String
" [", forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (forall a. a -> [a] -> [a]
List.intersperse String
";" [String]
keyvals), String
"]" ]
]
where
keyvals :: [String]
keyvals = forall a b. (a -> b) -> [a] -> [b]
map (\ String
s -> forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [ String
"(", String -> String
mkEsc String
s, String
", ", CF -> String -> String
terminal CF
cf String
s, String
")" ]) [String]
syms
cMacros :: [String]
cMacros :: [String]
cMacros =
[ String
"(* BNFC character classes *)"
, String
"let _letter = ['a'-'z' 'A'-'Z' '\\192' - '\\255'] # ['\\215' '\\247'] (* isolatin1 letter FIXME *)"
, String
"let _upper = ['A'-'Z' '\\192'-'\\221'] # '\\215' (* capital isolatin1 letter FIXME *)"
, String
"let _lower = ['a'-'z' '\\222'-'\\255'] # '\\247' (* small isolatin1 letter FIXME *)"
, String
"let _digit = ['0'-'9'] (* _digit *)"
, String
"let _idchar = _letter | _digit | ['_' '\\''] (* identifier character *)"
, String
"let _universal = _ (* universal: any character *)"
]
rMacros :: CF -> [String]
rMacros :: CF -> [String]
rMacros CF
cf
| forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
symbs = []
| Bool
otherwise =
[ String
"(* reserved words consisting of special symbols *)"
, [String] -> String
unwords forall a b. (a -> b) -> a -> b
$ String
"let rsyms =" forall a. a -> [a] -> [a]
: forall a. a -> [a] -> [a]
List.intersperse String
"|" (forall a b. (a -> b) -> [a] -> [b]
map String -> String
mkEsc [String]
symbs)
]
where symbs :: [String]
symbs = CF -> [String]
unicodeAndSymbols CF
cf
uMacros :: CF -> [String]
uMacros :: CF -> [String]
uMacros CF
cf = if forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
res then [] else String
"(* user-defined token types *)" forall a. a -> [a] -> [a]
: [String]
res
where res :: [String]
res = [String
"let " forall a. [a] -> [a] -> [a]
++ String
name forall a. [a] -> [a] -> [a]
++ String
" = " forall a. [a] -> [a] -> [a]
++ String
rep | (String
name, String
rep, String
_, Bool
_) <- CF -> [(String, String, String, Bool)]
userTokens CF
cf]
userTokens :: CF -> [(String, String, String, Bool)]
userTokens :: CF -> [(String, String, String, Bool)]
userTokens CF
cf =
[ (String -> String
ocamlTokenName String
name, Reg -> String
printRegOCaml Reg
reg, String
name, Bool
pos)
| TokenReg RString
n Bool
pos Reg
reg <- forall function. CFG function -> [Pragma]
cfgPragmas CF
cf
, let name :: String
name = forall a. WithPosition a -> a
wpThing RString
n
]
mkRule :: Doc -> [(Doc,Doc)] -> Doc
mkRule :: Doc -> [(Doc, Doc)] -> Doc
mkRule Doc
_ [] = Doc
empty
mkRule Doc
entrypoint ((Doc, Doc)
r:[(Doc, Doc)]
rs) = [Doc] -> Doc
vcat
[ Doc
"(* lexing rules *)"
, Doc
"rule" Doc -> Doc -> Doc
<+> Doc
entrypoint Doc -> Doc -> Doc
<+> Doc
"="
, Int -> Doc -> Doc
nest Int
2 forall a b. (a -> b) -> a -> b
$ Doc -> Int -> Doc -> Doc
hang Doc
"parse" Int
4 forall a b. (a -> b) -> a -> b
$ [Doc] -> Doc
vcat forall a b. (a -> b) -> a -> b
$
Int -> Doc -> Doc
nest Int
2 ((Doc, Doc) -> Doc
mkOne (Doc, Doc)
r) forall a. a -> [a] -> [a]
: forall a b. (a -> b) -> [a] -> [b]
map ((Doc
"|" Doc -> Doc -> Doc
<+>) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Doc, Doc) -> Doc
mkOne) [(Doc, Doc)]
rs
]
where
mkOne :: (Doc, Doc) -> Doc
mkOne (Doc
regex, Doc
action) = Doc
regex Doc -> Doc -> Doc
$$ Int -> Doc -> Doc
nest Int
8 ([Doc] -> Doc
hsep [Doc
"{", Doc
action, Doc
"}"])
mkRegexSingleLineComment :: String -> Doc
String
s = String -> Doc
cstring String
s Doc -> Doc -> Doc
<+> Doc
"(_ # '\\n')*"
mkRegexMultilineComment :: String -> String -> Doc
String
b String
e = String -> Doc
text forall a b. (a -> b) -> a -> b
$ Reg -> String
printRegOCaml forall a b. (a -> b) -> a -> b
$ String -> String -> Reg
mkRegMultilineComment String
b String
e
rules :: CF -> Doc
rules :: CF -> Doc
rules CF
cf = Doc -> [(Doc, Doc)] -> Doc
mkRule Doc
"token" forall a b. (a -> b) -> a -> b
$
[ (String -> Doc
mkRegexSingleLineComment String
s, Doc
"token lexbuf") | String
s <- [String]
singleLineC ]
forall a. [a] -> [a] -> [a]
++
[ (String -> String -> Doc
mkRegexMultilineComment String
b String
e, Doc
"token lexbuf") | (String
b,String
e) <- [(String, String)]
multilineC]
forall a. [a] -> [a] -> [a]
++
[ ( Doc
"rsyms"
, Doc
"let x = lexeme lexbuf in try Hashtbl.find symbol_table x with Not_found -> failwith (\"internal lexer error: reserved symbol \" ^ x ^ \" not found in hashtable\")" )
| Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => t a -> Bool
null (forall function. CFG function -> [String]
cfgSymbols CF
cf))]
forall a. [a] -> [a] -> [a]
++
[ (String -> Doc
text String
n , Bool -> Doc -> Doc
tokenAction Bool
pos (String -> Doc
text String
t)) | (String
n,String
_,String
t,Bool
pos) <- CF -> [(String, String, String, Bool)]
userTokens CF
cf]
forall a. [a] -> [a] -> [a]
++
[ ( Doc
"_letter _idchar*", Bool -> Doc -> Doc
tokenAction Bool
False Doc
"Ident" ) ]
forall a. [a] -> [a] -> [a]
++
[ ( Doc
"_digit+", Doc
"TOK_Integer (int_of_string (lexeme lexbuf))" )
, ( Doc
"_digit+ '.' _digit+ ('e' ('-')? _digit+)?"
, Doc
"TOK_Double (float_of_string (lexeme lexbuf))" )
, ( Doc
"'\\\"' (([^ '\\\"' '\\\\' '\\n']) | ('\\\\' ('\\\"' | '\\\\' | '\\\'' | 'n' | 't' | 'r')))* '\\\"'"
, Doc
"TOK_String (unescapeInitTail (lexeme lexbuf))" )
, ( Doc
"'\\'' (([^ '\\\'' '\\\\']) | ('\\\\' ('\\\\' | '\\\'' | 'n' | 't' | 'r'))) '\\\''"
, Doc
"TOK_Char (lexeme lexbuf).[1]")
, ( Doc
"[' ' '\\t' '\\r']", Doc
"token lexbuf")
, ( Doc
"'\\n'", Doc
"incr_lineno lexbuf; token lexbuf" )
, ( Doc
"eof", Doc
"TOK_EOF" )
]
where
([(String, String)]
multilineC, [String]
singleLineC) = CF -> ([(String, String)], [String])
comments CF
cf
tokenAction :: Bool -> Doc -> Doc
tokenAction Bool
pos Doc
t = case CF -> [String]
asciiKeywords CF
cf of
[] -> Doc
"TOK_" Doc -> Doc -> Doc
<> Doc
t Doc -> Doc -> Doc
<+> String -> Doc
arg String
"(lexeme lexbuf)"
[String]
_ -> Doc
"let l = lexeme lexbuf in try Hashtbl.find resword_table l with Not_found -> TOK_" Doc -> Doc -> Doc
<> Doc
t Doc -> Doc -> Doc
<+> String -> Doc
arg String
"l"
where
arg :: String -> Doc
arg String
l | Bool
pos = String -> Doc
text forall a b. (a -> b) -> a -> b
$ String
"((lexeme_start lexbuf, lexeme_end lexbuf), " forall a. [a] -> [a] -> [a]
++ String
l forall a. [a] -> [a] -> [a]
++ String
")"
| Bool
otherwise = String -> Doc
text String
l
printRegOCaml :: Reg -> String
printRegOCaml :: Reg -> String
printRegOCaml = [String] -> String
render forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Print a => Int -> a -> [String]
prt Int
0
render :: [String] -> String
render :: [String] -> String
render = Int -> [String] -> String
rend Int
0
where rend :: Int -> [String] -> String
rend :: Int -> [String] -> String
rend Int
i [String]
ss = case [String]
ss of
String
"[" :[String]
ts -> forall a. [a] -> [a] -> [a]
cons String
"[" forall a b. (a -> b) -> a -> b
$ Int -> [String] -> String
rend Int
i [String]
ts
String
"(" :[String]
ts -> forall a. [a] -> [a] -> [a]
cons String
"(" forall a b. (a -> b) -> a -> b
$ Int -> [String] -> String
rend Int
i [String]
ts
String
t : String
"," :[String]
ts -> forall a. [a] -> [a] -> [a]
cons String
t forall a b. (a -> b) -> a -> b
$ String -> String -> String
space String
"," forall a b. (a -> b) -> a -> b
$ Int -> [String] -> String
rend Int
i [String]
ts
String
t : String
")" :[String]
ts -> forall a. [a] -> [a] -> [a]
cons String
t forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [a] -> [a]
cons String
")" forall a b. (a -> b) -> a -> b
$ Int -> [String] -> String
rend Int
i [String]
ts
String
t : String
"]" :[String]
ts -> forall a. [a] -> [a] -> [a]
cons String
t forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [a] -> [a]
cons String
"]" forall a b. (a -> b) -> a -> b
$ Int -> [String] -> String
rend Int
i [String]
ts
String
t :[String]
ts -> String -> String -> String
space String
t forall a b. (a -> b) -> a -> b
$ Int -> [String] -> String
rend Int
i [String]
ts
[String]
_ -> String
""
cons :: [a] -> [a] -> [a]
cons [a]
s [a]
t = [a]
s forall a. [a] -> [a] -> [a]
++ [a]
t
space :: String -> String -> String
space String
t String
s = if forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
s then String
t else String
t forall a. [a] -> [a] -> [a]
++ String
" " forall a. [a] -> [a] -> [a]
++ String
s
parenth :: [String] -> [String]
parenth :: [String] -> [String]
parenth [String]
ss = [String
"("] forall a. [a] -> [a] -> [a]
++ [String]
ss forall a. [a] -> [a] -> [a]
++ [String
")"]
class Print a where
prt :: Int -> a -> [String]
prtList :: [a] -> [String]
prtList = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (forall a. Print a => Int -> a -> [String]
prt Int
0)
instance Print a => Print [a] where
prt :: Int -> [a] -> [String]
prt Int
_ = forall a. Print a => [a] -> [String]
prtList
instance Print Char where
prt :: Int -> Char -> [String]
prt Int
_ Char
c = [Char -> String
charLiteral Char
c]
prtList :: String -> [String]
prtList String
s = [forall a. Show a => a -> String
show String
s]
charLiteral :: Char -> String
charLiteral :: Char -> String
charLiteral Char
c
| Char -> Int
ord Char
c forall a. Ord a => a -> a -> Bool
<= Int
256 = forall a. Show a => a -> String
show Char
c
| Bool
otherwise = [Char
'"', Char
c, Char
'"']
prPrec :: Int -> Int -> [String] -> [String]
prPrec :: Int -> Int -> [String] -> [String]
prPrec Int
i Int
j = if Int
jforall a. Ord a => a -> a -> Bool
<Int
i then [String] -> [String]
parenth else forall a. a -> a
id
instance Print Identifier where
prt :: Int -> Identifier -> [String]
prt Int
_ (Identifier ((Int, Int)
_, String
i)) = [String
i]
instance Print Reg where
prt :: Int -> Reg -> [String]
prt Int
i Reg
e = case Reg
e of
RSeq Reg
reg0 Reg
reg -> Int -> Int -> [String] -> [String]
prPrec Int
i Int
2 (forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [forall a. Print a => Int -> a -> [String]
prt Int
2 Reg
reg0 , forall a. Print a => Int -> a -> [String]
prt Int
3 Reg
reg])
RAlt Reg
reg0 Reg
reg -> Int -> Int -> [String] -> [String]
prPrec Int
i Int
1 (forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [forall a. Print a => Int -> a -> [String]
prt Int
1 Reg
reg0 , [String
"|"] , forall a. Print a => Int -> a -> [String]
prt Int
2 Reg
reg])
RMinus Reg
RAny (RChar Char
c) -> [String
"[^", Char -> String
charLiteral Char
c, String
"]"]
RMinus Reg
RAny (RAlts String
str) -> forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [ [String
"[^"], forall a b. (a -> b) -> [a] -> [b]
map Char -> String
charLiteral String
str, [String
"]"] ]
RMinus Reg
reg0 Reg
reg -> Int -> Int -> [String] -> [String]
prPrec Int
i Int
1 (forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [forall a. Print a => Int -> a -> [String]
prt Int
2 Reg
reg0 , [String
"#"] , forall a. Print a => Int -> a -> [String]
prt Int
2 Reg
reg])
RStar Reg
reg -> forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [forall a. Print a => Int -> a -> [String]
prt Int
3 Reg
reg , [String
"*"]]
RPlus Reg
reg -> forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [forall a. Print a => Int -> a -> [String]
prt Int
3 Reg
reg , [String
"+"]]
ROpt Reg
reg -> forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [forall a. Print a => Int -> a -> [String]
prt Int
3 Reg
reg , [String
"?"]]
Reg
REps -> [String
"\"\""]
RChar Char
c -> [ Char -> String
charLiteral Char
c ]
RAlts String
str -> Int -> Int -> [String] -> [String]
prPrec Int
i Int
1 forall a b. (a -> b) -> a -> b
$ forall a. a -> [a] -> [a]
List.intersperse String
"|" forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Char -> String
charLiteral String
str
RSeqs String
str -> [ forall a. Show a => a -> String
show String
str ]
Reg
RDigit -> [String
"_digit"]
Reg
RLetter -> [String
"_letter"]
Reg
RUpper -> [String
"_upper"]
Reg
RLower -> [String
"_lower"]
Reg
RAny -> [String
"_universal"]