module Language.Why3.LexerUtils where import Data.Char(toLower) import Data.Word(Word8) import qualified Data.ByteString.Lazy as BS import Data.Bits((.&.)) import Data.Text.Lazy (Text) import qualified Data.Text.Lazy as Text data AlexInput = Inp { alexPos :: !Position , input :: !BS.ByteString } deriving Show alexInputPrevChar :: AlexInput -> Char alexInputPrevChar _ = error "We don't use left contexts" alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) alexGetByte i = do (b,bs) <- BS.uncons (input i) return (b, i { input = bs, alexPos = move (alexPos i) b }) ------------------------------------------------------------------------------- type Action = Position -> Text -> LexS -> (Maybe Token, LexS) data LexS = Normal | InComment Position ![Position] [Text] | InString Position Text data Position = Position { line :: !Int, col :: !Int } deriving (Eq,Ord,Show) move :: Position -> Word8 -> Position move p c | c == toEnum (fromEnum '\t') = p { col = ((col p + 7) `div` 8) * 8 + 1 } | c == toEnum (fromEnum '\n') = p { col = 0, line = 1 + line p } -- The top 2 bits are 10... | c .&. 0xC0 == 0x80 {- partial byte in a char -} = p | otherwise = p { col = 1 + col p } startComment :: Action startComment p txt s = (Nothing, InComment p stack chunks) where (stack,chunks) = case s of Normal -> ([], [txt]) InComment q qs cs -> (q : qs, txt : cs) _ -> error "[Lexer] startComment" ["in a string"] endComent :: Action endComent _ txt s = case s of InComment f [] cs -> (Just (mkToken f cs), Normal) InComment _ (q:qs) cs -> (Nothing, InComment q qs (txt : cs)) _ -> error "[Lexer] endComment" ["outside commend"] where mkToken f cs = Token { tokenType = White BlockComment , tokenPos = f , tokenText = Text.concat $ reverse $ txt : cs } addToComment :: Action addToComment _ txt s = (Nothing, InComment p stack (txt : chunks)) where (p, stack, chunks) = case s of InComment q qs cs -> (q,qs,cs) _ -> error "[Lexer] addToComment" ["outside comment"] startString :: Action startString p txt _ = (Nothing, InString p txt) endString :: Action endString _ txt s = case s of InString ps str -> (Just (mkToken ps str), Normal) _ -> error "[Lexer] endString" ["outside string"] where parseStr s1 = case reads s1 of [(cs, "")] -> StrLit cs _ -> Err InvalidString mkToken ps str = Token { tokenPos = ps -- XXX , tokenType = parseStr (Text.unpack (Text.append str txt)) , tokenText = Text.append str txt } addToString :: Action addToString _ txt s = case s of InString p str -> (Nothing,InString p (Text.append str txt)) _ -> error "[Lexer] addToString" ["outside string"] emit :: TokenT -> Action emit t p s z = (Just tok, z) where tok = Token { tokenPos = p, tokenType = t, tokenText = s } emitS :: (Text -> TokenT) -> Action emitS t p s z = emit (t s) p s z -------------------------------------------------------------------------------- -- XXX: this can be better... numToken :: Text -> TokenT numToken ds0 = Num (op (toVal ds)) (fromInteger rad) where (rad,ds,op) = case filter (/= '_') (Text.unpack ds0) of '0' : 'x' : more -> (16,more,id) '0' : 'X' : more -> (16,more,id) '0' : 'O' : more -> (8,more,id) '0' : 'o' : more -> (8,more,id) '0' : 'B' : more -> (2,more,id) '0' : 'b' : more -> (2,more,id) '-' : more -> (10,more,negate) more -> (10,more,id) toVal = sum . zipWith (\n x -> rad^n * x) [0 :: Integer ..] . map toDig . reverse toDig = if rad == 16 then fromHexDigit else fromDecDigit fromDecDigit :: Char -> Integer fromDecDigit x = read [x] fromHexDigit :: Char -> Integer fromHexDigit x' | 'a' <= x && x <= 'f' = fromIntegral (10 + fromEnum x - fromEnum 'a') | otherwise = fromDecDigit x where x = toLower x' -------------------------------------------------------------------------------- data Token = Token { tokenType :: TokenT , tokenText :: Text , tokenPos :: Position } deriving Show data IdQual = Unqual | Qual deriving (Eq,Show) data IdCase = Lower | Upper deriving (Eq,Show) data TokenT = Num Integer Int -- ^ value, base | RealTok -- ^ We don't process reals for now | Ident IdQual IdCase -- ^ qualified?, classification | TIdent -- ^ Theory identifier | StrLit Text -- ^ string literal | KW TokenKW -- ^ keyword | Sym TokenSym -- ^ symbols | Op TokenOp -- ^ operators | White TokenW -- ^ white space token | Err TokenErr -- ^ error token | EOF -- ^ End of file deriving (Eq,Show) data TokenW = BlockComment | Space deriving (Eq,Show) data TokenKW = KW_if | KW_then | KW_else | KW_let | KW_in | KW_match | KW_with | KW_end | KW_as | KW_forall | KW_exists | KW_not | KW_true | KW_false | KW_theory | KW_type | KW_constant | KW_function | KW_predicate | KW_inductive | KW_coinductive | KW_axiom | KW_lemma | KW_goal | KW_use | KW_clone | KW_namespace | KW_import | KW_export deriving (Eq,Show) data TokenOp = ArrowL | ArrowR | ArrowLR | AsymDisj | Disj | AsymConj | Conj | BangOp | OtherOp Int -- operator level deriving (Eq,Show) data TokenSym = Comma | Dot | Semi | Colon | Quote | Eq | Underscore | Bar | ParenL | ParenR | BracketL | BracketR | CurlyL | CurlyR deriving (Eq,Show) data TokenErr = UnterminatedComment | UnterminatedString | UnterminatedChar | InvalidString | InvalidChar | LexicalError deriving (Eq,Show)