{ -- At present Alex generates code with too many warnings. {-# LANGUAGE Trustworthy #-} {-# OPTIONS_GHC -w #-} module Cryptol.Parser.Lexer ( primLexer, lexer, Layout(..) , Token(..), TokenT(..) , TokenV(..), TokenKW(..), TokenErr(..), TokenOp(..), TokenSym(..), TokenW(..) , Located(..) , Config(..) , defaultConfig ) where import Cryptol.Parser.Position import Cryptol.Parser.LexerUtils import Cryptol.Parser.Unlit(unLit) import qualified Data.Text.Lazy as Text } $id_first = [a-zA-Z_α-ωΑ-Ω] $id_next = [a-zA-Z0-9_'α-ωΑ-Ω] @num2 = "0b" [0-1]+ @num8 = "0o" [0-7]+ @num10 = [0-9]+ @num16 = "0x" [0-9A-Fa-f]+ @strPart = [^\\\"]+ @chrPart = [^\\\']+ :- <0,comment> "/*" { startComment } { "*/" { endComent } . { addToComment } \n { addToComment } } { @strPart { addToString } \" { endString } \\. { addToString } \n { endString } } { @chrPart { addToChar } \' { endChar } \\. { addToChar } \n { endChar } } <0> { $white+ { emit $ White Space } "//" .* { emit $ White LineComment } -- Please update the docs, if you add new entries. "Arith" { emit $ KW KW_Arith } "Bit" { emit $ KW KW_Bit } "Cmp" { emit $ KW KW_Cmp } "False" { emit $ KW KW_False } "Inf" { emit $ KW KW_inf } "True" { emit $ KW KW_True } "else" { emit $ KW KW_else } "Eq" { emit $ KW KW_Eq } "error" { emit $ KW KW_error } "extern" { emit $ KW KW_extern } "fin" { emit $ KW KW_fin } "if" { emit $ KW KW_if } "private" { emit $ KW KW_private } "join" { emit $ KW KW_join } "include" { emit $ KW KW_include } "inf" { emit $ KW KW_inf } "lg2" { emit $ KW KW_lg2 } "lengthFromThen" { emit $ KW KW_lengthFromThen } "lengthFromThenTo" { emit $ KW KW_lengthFromThenTo } "max" { emit $ KW KW_max } "min" { emit $ KW KW_min } "module" { emit $ KW KW_module } "newtype" { emit $ KW KW_newtype } "pragma" { emit $ KW KW_pragma } "property" { emit $ KW KW_property } "pmult" { emit $ KW KW_pmult } "pdiv" { emit $ KW KW_pdiv } "pmod" { emit $ KW KW_pmod } "random" { emit $ KW KW_random } "reverse" { emit $ KW KW_reverse } "split" { emit $ KW KW_split } "splitAt" { emit $ KW KW_splitAt } "then" { emit $ KW KW_then } "transpose" { emit $ KW KW_transpose } "type" { emit $ KW KW_type } "where" { emit $ KW KW_where } "let" { emit $ KW KW_let } "x" { emit $ KW KW_x } "zero" { emit $ KW KW_zero } "import" { emit $ KW KW_import } "as" { emit $ KW KW_as } "hiding" { emit $ KW KW_hiding } "newtype" { emit $ KW KW_newtype } @num2 { emitS (numToken 2 . drop 2) } @num8 { emitS (numToken 8 . drop 2) } @num10 { emitS (numToken 10 . drop 0) } @num16 { emitS (numToken 16 . drop 2) } "_" { emit $ Sym Underscore } $id_first $id_next* { mkIdent } "+" { emit $ Op Plus } "-" { emit $ Op Minus } "*" { emit $ Op Mul } "/" { emit $ Op Div } "%" { emit $ Op Mod } "^^" { emit $ Op Exp } "!=" { emit $ Op NotEqual } "==" { emit $ Op Equal } "===" { emit $ Op EqualFun } "!==" { emit $ Op NotEqualFun } ">" { emit $ Op GreaterThan } "<" { emit $ Op LessThan } "<=" { emit $ Op LEQ } ">=" { emit $ Op GEQ } ">>" { emit $ Op ShiftR } "<<" { emit $ Op ShiftL } ">>>" { emit $ Op RotR } "<<<" { emit $ Op RotL } "~" { emit $ Op Complement } "^" { emit $ Op Xor } "||" { emit $ Op Disj } "&&" { emit $ Op Conj } "!" { emit $ Op Bang } "!!" { emit $ Op BangBang } "@" { emit $ Op At } "@@" { emit $ Op AtAt } "#" { emit $ Op Hash } "\" { emit $ Sym Lambda } "->" { emit $ Sym ArrR } "<-" { emit $ Sym ArrL } "=>" { emit $ Sym FatArrR } "=" { emit $ Sym EqDef } "," { emit $ Sym Comma } ";" { emit $ Sym Semi } "." { emit $ Sym Dot } ":" { emit $ Sym Colon } "::" { emit $ Sym ColonColon } "`" { emit $ Sym BackTick } ".." { emit $ Sym DotDot } "..." { emit $ Sym DotDotDot } "|" { emit $ Sym Bar } "(" { emit $ Sym ParenL } ")" { emit $ Sym ParenR } "[" { emit $ Sym BracketL } "]" { emit $ Sym BracketR } "{" { emit $ Sym CurlyL } "}" { emit $ Sym CurlyR } "<|" { emit $ Sym TriL } "|>" { emit $ Sym TriR } \" { startString } \' { startChar } } { -- This code is here because it depends on `comment`, which is defined -- in this file. stateToInt :: LexS -> Int stateToInt Normal = 0 stateToInt (InComment {}) = comment stateToInt (InString {}) = string stateToInt (InChar {}) = char -- | Returns the tokens in the last position of the input that we processed. -- White space is removed, and layout processing is done as requested. -- This stream is fed to the parser. lexer :: Config -> String -> ([Located Token], Position) lexer cfg cs = ( case cfgLayout cfg of Layout -> layout cfg lexemes NoLayout -> lexemes , finalPos ) where (lex0, finalPos) = primLexer cfg cs lexemes = dropWhite lex0 -- | Returns the tokens and the last position of the input that we processed. -- The tokens include whte space tokens. primLexer :: Config -> String -> ([Located Token], Position) primLexer cfg cs = run inp Normal where inp = Inp { alexPos = start , alexInputPrevChar = '\n' , input = Text.unpack -- XXX: Use Text $ unLit (cfgPreProc cfg) $ Text.pack cs , moreBytes = [] } singleR p = Range p p (cfgSource cfg) eofR p = Range p' p' (cfgSource cfg) where p' = Position { line = line p + 1, col = 0 } run i s = case alexScan i (stateToInt s) of AlexEOF -> case s of Normal -> ([ Located (eofR $ alexPos i) (Token EOF "end of file") ] , alexPos i ) InComment p _ _ -> ( [ Located (singleR p) $ Token (Err UnterminatedComment) "unterminated comment" ] , alexPos i) InString p _ -> ( [ Located (singleR p) $ Token (Err UnterminatedString) "unterminated string" ] , alexPos i) InChar p _ -> ( [ Located (singleR p) $ Token (Err UnterminatedChar) "unterminated character" ] , alexPos i) AlexError i' -> let p1 = alexPos i p2 = alexPos i' inp = input i bad = if line p1 == line p2 then take (col p2 - col p1) inp else takeWhile (/= '\n') inp in ( [ Located (Range (alexPos i) (alexPos i') (cfgSource cfg)) $ Token (Err LexicalError) bad ] , alexPos i') AlexSkip i' _ -> run i' s AlexToken i' l act -> let txt = take l (input i) (mtok,s') = act cfg (alexPos i) txt s (rest,pos) = run i' $! s' in case mtok of Nothing -> (rest, pos) Just t -> (t : rest, pos) -- vim: ft=haskell }