{ -- | -- Module : Language.LLVM.Parser.Lexer -- Copyright : (c) Harvard University 2006-2011 -- (c) Geoffrey Mainland 2011-2013 -- (c) Drexel University 2013 -- (c) Timo von Holtz 2014 -- License : BSD-style -- Maintainer : tvh@tvholtz.de {-# OPTIONS_GHC -w #-} module LLVM.General.Quote.Parser.Lexer ( lexToken ) where import Control.Applicative import Control.Monad.Error import Control.Monad.State import qualified Data.ByteString.Char8 as B import qualified Data.Map as Map import Data.Char (isDigit, isLower, isAlphaNum, isOctDigit, isHexDigit, chr, toLower) import Data.Loc import Data.Ratio ((%)) import Text.PrettyPrint.Mainland import LLVM.General.Quote.Parser.Tokens import LLVM.General.Quote.Parser.Monad } $nondigit = [a-z A-Z \_ \.] $digit = [0-9] $nonzerodigit = [1-9] $octalDigit = [0-7] $hexadecimalDigit = [0-9A-Fa-f] $whitechar = [\ \t\n\r\f\v] @fractionalConstant = $digit* "." $digit+ | $digit+ "." @exponentPart = [eE] [\+\-]? $digit+ @floatingConstant = @fractionalConstant @exponentPart? | $digit+ @exponentPart @decimalConstant = $nonzerodigit $digit* | "0" @octalConstant = "0" $octalDigit* @hexadecimalConstant = "0" [xX] $hexadecimalDigit+ @idText = [a-z A-Z \_ \. \-] [a-z A-Z \_ \. \- 0-9]* @identifier = [@\%\!] ( @decimalConstant | @idText) @jumpLabel = @idText ":" @integerType = "i" $nonzerodigit $digit* @keyword = [a-z \_]+ ($nonzerodigit $digit*)? tokens :- <0> { "$dl:" / { allowAnti } { lexAnti Tanti_dl } "$dlM:" / { allowAnti } { lexAntiM Tanti_dl } "$tt:" / { allowAnti } { lexAnti Tanti_tt } "$ttM:" / { allowAnti } { lexAntiM Tanti_tt } "$def:" / { allowAnti } { lexAnti Tanti_def } "$defM:" / { allowAnti } { lexAntiM Tanti_def } "$defs:" / { allowAnti } { lexAnti Tanti_defs } "$defsM:" / { allowAnti } { lexAntiM Tanti_defs } "$bb:" / { allowAnti } { lexAnti Tanti_bb } "$bbM:" / { allowAnti } { lexAntiM Tanti_bb } "$bbs:" / { allowAnti } { lexAnti Tanti_bbs } "$bbsM:" / { allowAnti } { lexAntiM Tanti_bbs } "$instr:" / { allowAnti } { lexAnti Tanti_instr } "$instrM:" / { allowAnti } { lexAntiM Tanti_instr } "$instrs:" / { allowAnti } { lexAnti Tanti_instrs } "$instrsM:" / { allowAnti } { lexAntiM Tanti_instrs } "$type:" / { allowAnti } { lexAnti Tanti_type } "$typeM:" / { allowAnti } { lexAntiM Tanti_type } "$opr:" / { allowAnti } { lexAnti Tanti_opr } "$oprM:" / { allowAnti } { lexAntiM Tanti_opr } "$const:" / { allowAnti } { lexAnti Tanti_const } "$constM:" / { allowAnti } { lexAntiM Tanti_const } "$id:" / { allowAnti } { lexAnti Tanti_id } "$idM:" / { allowAnti } { lexAntiM Tanti_id } "$gid:" / { allowAnti } { lexAnti Tanti_gid } "$gidM:" / { allowAnti } { lexAntiM Tanti_gid } "$param:" / { allowAnti } { lexAnti Tanti_param } "$paramM:" / { allowAnti } { lexAntiM Tanti_param } "$params:" / { allowAnti } { lexAnti Tanti_params } "$paramsM:" / { allowAnti } { lexAntiM Tanti_params } } <0> { ";" .* ; $whitechar+ ; @identifier { identifier } @jumpLabel { jumpLabel } @integerType { numberedToken TintegerType } @keyword { keyword } @floatingConstant { lexFloat } @decimalConstant { lexInteger 0 decimal } @octalConstant { lexInteger 1 octal } @hexadecimalConstant { lexInteger 2 hexadecimal } \" { lexStringTok } "(" { token Tlparen } ")" { token Trparen } "[" { token Tlbrack } "]" { token Trbrack } "{" { token Tlbrace } "}" { token Trbrace } "<" { token Tlt } ">" { token Tgt } "," { token Tcomma } "*" { token Tstar } "=" { token Tassign } "-" { token Tminus } "!" { token Tbang } "..." { token Tpoints } } { type Action = AlexInput -> AlexInput -> P (L Token) inputString :: AlexInput -> AlexInput -> String inputString beg end = (B.unpack . B.take (alexOff end - alexOff beg)) (alexInput beg) locateTok :: AlexInput -> AlexInput -> Token -> L Token locateTok beg end tok = L (Loc (alexPos beg) (alexPos end)) tok token :: Token -> Action token tok beg end = return $ locateTok beg end tok identifier :: Action identifier beg end = do v <- case head ident of '%' -> return Local '@' -> return Global '!' -> return Meta case isDigit $ head $ tail ident of False -> return $ locateTok beg end $ Tnamed v (tail ident) True -> return $ locateTok beg end $ Tunnamed v (read $ tail ident) where ident :: String ident = inputString beg end jumpLabel :: Action jumpLabel beg end = do token (TjumpLabel $ init ident) beg end where ident :: String ident = inputString beg end numberedToken :: (Num a, Read a) => (a -> Token) -> Action numberedToken f beg end = do return $ locateTok beg end $ f (read $ tail ident) where ident :: String ident = inputString beg end keyword :: Action keyword beg end = do case Map.lookup ident keywordMap of Nothing -> identError Just (tok, Nothing) -> token tok beg end Just (tok, Just i) -> do isKw <- useExts i if isKw then token tok beg end else identError where ident :: String ident = inputString beg end identError = fail $ "not a valid keyword: " ++ show ident lexStringTok :: Action lexStringTok beg _ = do s <- lexString "" end <- getInput return $ locateTok beg end (TstringConst s) where lexString :: String -> P String lexString s = do c <- nextChar case c of '"' -> return (reverse s) '\\' -> do c' <- lexCharEscape lexString (c' : s) _ -> lexString (c : s) lexAnti :: (String -> Token) -> Action lexAnti antiTok = lexAntiM (antiTok . \s -> "return (" ++ s ++ ")") lexAntiM :: (String -> Token) -> Action lexAntiM antiTok beg end = do c <- nextChar s <- case c of '(' -> lexExpression 0 "" _ | isIdStartChar c -> lexIdChars [c] | otherwise -> lexerError beg (text "illegal anitquotation") return $ locateTok beg end (antiTok s) where lexIdChars :: String -> P String lexIdChars s = do maybe_c <- maybePeekChar case maybe_c of Just c | isIdChar c -> skipChar >> lexIdChars (c : s) _ -> return (reverse s) lexExpression :: Int -> String -> P String lexExpression depth s = do maybe_c <- maybePeekChar case maybe_c of Nothing -> do end' <- getInput parserError (Loc (alexPos beg) (alexPos end')) (text "unterminated antiquotation") Just '(' -> skipChar >> lexExpression (depth+1) ('(' : s) Just ')' | depth == 0 -> skipChar >> return (unescape (reverse s)) | otherwise -> skipChar >> lexExpression (depth-1) (')' : s) Just c -> skipChar >> lexExpression depth (c : s) where unescape :: String -> String unescape ('\\':'|':'\\':']':s') = '|' : ']' : unescape s' unescape (c:s') = c : unescape s' unescape [] = [] isIdStartChar :: Char -> Bool isIdStartChar '_' = True isIdStartChar c = isLower c isIdChar :: Char -> Bool isIdChar '_' = True isIdChar '\'' = True isIdChar c = isAlphaNum c lexCharEscape :: P Char lexCharEscape = do cur <- getInput c <- nextChar case c of 'a' -> return '\a' 'b' -> return '\b' 'f' -> return '\f' 'n' -> return '\n' 'r' -> return '\r' 't' -> return '\t' 'v' -> return '\v' '\\' -> return '\\' '\'' -> return '\'' '"' -> return '"' '?' -> return '?' 'x' -> chr <$> checkedReadNum isHexDigit 16 hexDigit n | isOctDigit n -> setInput cur >> chr <$> checkedReadNum isOctDigit 8 octDigit _c -> return c lexInteger :: Int -> Radix -> Action lexInteger ndrop radix@(_, isRadixDigit, _) beg end = case i of [n] -> return $ locateTok beg end (toToken n) _ -> fail "bad parse for integer" where num :: String num = (takeWhile isRadixDigit . drop ndrop) s s :: String s = inputString beg end i :: [Integer] i = do (n, _) <- readInteger radix num return n toToken :: Integer -> Token toToken n = TintConst n lexFloat :: Action lexFloat beg end = case i of [n] -> token (toToken n) beg end _ -> fail "bad parse for integer" where s :: String s = inputString beg end i :: [Rational] i = do (n, _) <- readRational s return n toToken :: Rational -> Token toToken n = TfloatConst n type Radix = (Integer, Char -> Bool, Char -> Int) decDigit :: Char -> Int decDigit c | c >= '0' && c <= '9' = ord c - ord '0' | otherwise = error "error in decimal constant" octDigit :: Char -> Int octDigit c | c >= '0' && c <= '7' = ord c - ord '0' | otherwise = error "error in octal constant" hexDigit :: Char -> Int hexDigit c | c >= 'a' && c <= 'f' = 10 + ord c - ord 'a' | c >= 'A' && c <= 'F' = 10 + ord c - ord 'A' | c >= '0' && c <= '9' = ord c - ord '0' | otherwise = error "error in hexadecimal constant" decimal :: Radix decimal = (10, isDigit, decDigit) octal :: Radix octal = (8, isOctDigit, octDigit) hexadecimal :: Radix hexadecimal = (16, isHexDigit, hexDigit) readInteger :: Radix -> ReadS Integer readInteger (radix, isRadixDigit, charToInt) = go 0 where go :: Integer -> ReadS Integer go x [] = return (x, "") go x (c : cs) | isRadixDigit c = go (x * radix + toInteger (charToInt c)) cs | otherwise = return (x, c : cs) readDecimal :: ReadS Integer readDecimal = readInteger decimal readRational :: ReadS Rational readRational s = do (n, d, t) <- readFix (x, t') <- readExponent t return ((n % 1) * 10^^(x - toInteger d), t') where readFix :: [(Integer, Int, String)] readFix = return (read (i ++ f), length f, u) where (i, t) = span isDigit s (f, u) = case t of '.' : u' -> span isDigit u' _ -> ("", t) readExponent :: ReadS Integer readExponent "" = return (0, "") readExponent (e : s') | e `elem` "eE" = go s' | otherwise = return (0, s') where go :: ReadS Integer go ('+' : s'') = readDecimal s'' go ('-' : s'') = do (x, t) <- readDecimal s'' return (-x, t) go s'' = readDecimal s'' checkedReadNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Int checkedReadNum isDigit' base conv = do cur <- getInput c <- peekChar when (not $ isDigit' c) $ illegalNumericalLiteral cur readNum isDigit base conv readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Int readNum isDigit' base conv = readI 0 where readI :: Int -> P Int readI n = do c <- peekChar if isDigit' c then do let n' = n*base + conv c n' `seq` skipChar >> readI n' else return n lexToken :: P (L Token) lexToken = do beg <- getInput sc <- getLexState st <- get case alexScanUser st beg sc of AlexEOF -> token Teof beg beg AlexError end -> lexerError end (text rest) where rest :: String rest = B.unpack $ B.take 80 (alexInput beg) AlexSkip end _ -> setInput end >> lexToken AlexToken end _len t -> setInput end >> t beg end }