{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} module Language.Lua.LexerUtils where import Data.Text (Text) import qualified Data.Text as Text #if !MIN_VERSION_base(4,8,0) import Control.Applicative (Applicative(..)) #endif import Language.Lua.Token import AlexTools -- | Lua token with position information. data LexToken = LexToken { ltokToken :: Token , ltokRange :: SourceRange , ltokLexeme :: Text } deriving (Show) -- | Lexer mode data Mode = NormalMode | StringMode StringMode [SourceRange] Input -- ^ string type, errors, input at start | CommentMode Input -- ^ Single line comment. Input at beginning of comment | QuoteMode Input -- input at beginning of long-quote Int -- delim length Bool -- is comment -- ^ start delimlen iscomment data StringMode = SingleQuote | DoubleQuote -- | This is called when we encounter the end of a line before seeing -- the closing character for a string. unterminatedString :: Action Mode [Lexeme Token] unterminatedString = do ~(StringMode _strTy _errs inp0) <- getLexerState setLexerState NormalMode longToken inp0 TokUntermString -- | An unknown character in "normal mode" invalidChar :: Action Mode [Lexeme Token] invalidChar = do setLexerState NormalMode lexeme TokUnexpected -- | A a bad escape withing a string invalidEsc :: Action Mode [Lexeme Token] invalidEsc = do inp1 <- startInput inp2 <- endInput ~(StringMode m errs inp0) <- getLexerState let err = SourceRange { sourceFrom = inputPos inp1 , sourceTo = inputPrev inp2 } setLexerState (StringMode m (err : errs) inp0) return [] checkEOF :: Mode -> Input -> [Lexeme Token] checkEOF mode Input { inputPrev = end } = case mode of NormalMode {} -> [] CommentMode {} -> [] QuoteMode inp _ True -> ret TokUntermComment inp QuoteMode inp _ _ -> ret TokUntermString inp StringMode _ _ inp -> ret TokUntermString inp where ret t Input { inputPos = start, inputText = rest } = [ Lexeme { lexemeToken = t , lexemeRange = SourceRange { sourceFrom = start, sourceTo = end } , lexemeText = rest } ] -- | Start lexing a long-quoted string literal enterLongString :: Action Mode [Lexeme Token] enterLongString = do inp <- startInput len <- matchLength setLexerState (QuoteMode inp len False) return [] -- | Start lexing of a string literal enterString :: StringMode -> Action Mode [Lexeme Token] enterString sm = do inp <- startInput setLexerState (StringMode sm [] inp) return [] -- | Start lexing a long-quoted comment enterLongComment :: Action Mode [Lexeme Token] enterLongComment = do inp <- startInput len <- matchLength setLexerState (QuoteMode inp (len - 2) True) return [] -- | Start lexing a single-line comment enterComment :: Action Mode [Lexeme Token] enterComment = do inp <- startInput setLexerState (CommentMode inp) return [] -- | Construct a lexeme spanning multiple matches longToken :: Input {- ^ input from the mode -} -> Token {- ^ token for lexeme -} -> Action Mode [Lexeme Token] longToken Input { inputPos = start, inputText = text } t = do Input { inputPrev = end } <- endInput let lexLen = 1 + sourceIndex end - sourceIndex start return [Lexeme { lexemeToken = t , lexemeRange = SourceRange { sourceFrom = start, sourceTo = end } , lexemeText = Text.take lexLen text } ] -- | The closing delimiter for long-quoted lexemes must be the same length as -- the opening delimiter. This predicate checks if the currently match -- delimiter is the right length. endStringPredicate :: Mode {- ^ lexer mode -} -> Input {- ^ input stream before the token -} -> Int {- ^ length of the token -} -> Input {- ^ input stream after the token -} -> Bool {- ^ is expected ending long-quote -} endStringPredicate mode _ len _ = case mode of QuoteMode _ startlen _ -> len == startlen _ -> False -- | Action called at the end of a lexer-sub mode. endMode :: Action Mode [Lexeme Token] endMode = do mode <- getLexerState setLexerState NormalMode case mode of StringMode _ err inp -> longToken inp $ if null err then TokSLit else TokUnexpected CommentMode inp -> longToken inp TokComment QuoteMode inp _ isComment -> longToken inp $ if isComment then TokComment else TokSLit NormalMode -> error "endMode: internal lexer error" -- | Drop the first line of a Lua file when it starts with a '#' dropSpecialComment :: Text -> Text dropSpecialComment text | "#" `Text.isPrefixOf` text = Text.dropWhile (/='\n') text | otherwise = text -- Newline is preserved in order to ensure that line numbers stay correct -- | This function drops whitespace and comments from a list of lexemes -- in order to make it suitable for parsing. dropWhiteSpace :: [Lexeme Token] -> [Lexeme Token] dropWhiteSpace = filter (not . isWhite . lexemeToken) where isWhite TokWhiteSpace = True isWhite TokComment = True isWhite _ = False