module LexerGen2(lexerGen,OutputFun(..)) where import RegExp(Trans(..),Transducer) import DFA(DFA(..),renumberEdges,tokenClasses,showDFA) import Minimalize import CompileRegExp(compile) import DetMachineToHaskell2(dfaToHaskell,OutputFun(..)) import PPrint(pprint) import qualified OrdMap as OM(fromList) import List(sort) import HaskellChars(HaskellChar) {-+ The lexer generator takes the name of the module to generate, the name of the lexer function to export from that module and the regular expression that defines the lexical syntax. It returns the generated Haskell module as a string.
lexerGen
also consults the command line arguments. If
the word nocc is present, it does not use character classes to reduce
the size of the code.
-}
lexerGen :: (Ord o,Show o,OutputFun o) =>
String -> String -> Transducer HaskellChar o -> [String] -> String
lexerGen moduleName functionName program args =
outputDFA (dfa2old (compile program))
where
outDFA = "dfa" `elem` args -- output the DFA or generate Haskell?
useCC = "nocc" `notElem` args -- use character classes?
outputDFA = if useCC then outputWithCharClasses else outputDetm Nothing
outputWithCharClasses (n,dfa) =
outputDetm (Just ccs) (n,renumberEdges ccs dfa)
where
charclasses = sort $ tokenClasses dfa
ccs = [(c,n)|(n,(cs,_))<-zip [(1::Int)..] charclasses,c<-cs]
outputDetm optccs dfa0 =
if outDFA
then showDFA dfa
else "\n-- Automatically generated code for a DFA follows:\n" ++
"--Equal states: "++show eqs++"\n"++
pprint haskellCode
where
(eqs,dfa) = minimalize dfa0
haskellCode =
dfaToHaskell optccs moduleName ["Char","HsLexUtils"] functionName dfa
{-+
A function to convert from the new to the old DFA represenation...
-}
dfa2old dfa = ((1::Int,final),DFA (OM.fromList states))
where
final = [s|(s,(True,_))<-dfa]
states = map state dfa
state (n,(_,edges)) = (n,(input,output))
where
input = [(i,n)|(I i,n)<-edges]
output = [(o,n)|(O o,n)<-edges]