module Language.Why3.LexerUtils where
import Data.Char(toLower)
import Data.Word(Word8)
import qualified Data.ByteString.Lazy as BS
import Data.Bits((.&.))
import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as Text
data AlexInput = Inp { alexPos :: !Position
, input :: !BS.ByteString
} deriving Show
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar _ = error "We don't use left contexts"
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte i =
do (b,bs) <- BS.uncons (input i)
return (b, i { input = bs, alexPos = move (alexPos i) b })
type Action = Position -> Text -> LexS -> (Maybe Token, LexS)
data LexS = Normal
| InComment Position ![Position] [Text]
| InString Position Text
data Position = Position { line :: !Int, col :: !Int }
deriving (Eq,Ord,Show)
move :: Position -> Word8 -> Position
move p c
| c == toEnum (fromEnum '\t') = p { col = ((col p + 7) `div` 8) * 8 + 1 }
| c == toEnum (fromEnum '\n') = p { col = 0, line = 1 + line p }
| c .&. 0xC0 == 0x80 = p
| otherwise = p { col = 1 + col p }
startComment :: Action
startComment p txt s = (Nothing, InComment p stack chunks)
where
(stack,chunks) =
case s of
Normal -> ([], [txt])
InComment q qs cs -> (q : qs, txt : cs)
_ -> error "[Lexer] startComment" ["in a string"]
endComent :: Action
endComent _ txt s =
case s of
InComment f [] cs -> (Just (mkToken f cs), Normal)
InComment _ (q:qs) cs -> (Nothing, InComment q qs (txt : cs))
_ -> error "[Lexer] endComment" ["outside commend"]
where
mkToken f cs = Token
{ tokenType = White BlockComment
, tokenPos = f
, tokenText = Text.concat $ reverse $ txt : cs
}
addToComment :: Action
addToComment _ txt s = (Nothing, InComment p stack (txt : chunks))
where
(p, stack, chunks) =
case s of
InComment q qs cs -> (q,qs,cs)
_ -> error "[Lexer] addToComment" ["outside comment"]
startString :: Action
startString p txt _ = (Nothing, InString p txt)
endString :: Action
endString _ txt s = case s of
InString ps str -> (Just (mkToken ps str), Normal)
_ -> error "[Lexer] endString" ["outside string"]
where
parseStr s1 = case reads s1 of
[(cs, "")] -> StrLit cs
_ -> Err InvalidString
mkToken ps str = Token
{ tokenPos = ps
, tokenType = parseStr (Text.unpack (Text.append str txt))
, tokenText = Text.append str txt
}
addToString :: Action
addToString _ txt s = case s of
InString p str -> (Nothing,InString p (Text.append str txt))
_ -> error "[Lexer] addToString" ["outside string"]
emit :: TokenT -> Action
emit t p s z = (Just tok, z)
where tok = Token { tokenPos = p, tokenType = t, tokenText = s }
emitS :: (Text -> TokenT) -> Action
emitS t p s z = emit (t s) p s z
numToken :: Text -> TokenT
numToken ds0 = Num (op (toVal ds)) (fromInteger rad)
where
(rad,ds,op) =
case filter (/= '_') (Text.unpack ds0) of
'0' : 'x' : more -> (16,more,id)
'0' : 'X' : more -> (16,more,id)
'0' : 'O' : more -> (8,more,id)
'0' : 'o' : more -> (8,more,id)
'0' : 'B' : more -> (2,more,id)
'0' : 'b' : more -> (2,more,id)
'-' : more -> (10,more,negate)
more -> (10,more,id)
toVal = sum . zipWith (\n x -> rad^n * x) [0 :: Integer ..]
. map toDig . reverse
toDig = if rad == 16 then fromHexDigit else fromDecDigit
fromDecDigit :: Char -> Integer
fromDecDigit x = read [x]
fromHexDigit :: Char -> Integer
fromHexDigit x'
| 'a' <= x && x <= 'f' = fromIntegral (10 + fromEnum x fromEnum 'a')
| otherwise = fromDecDigit x
where x = toLower x'
data Token = Token { tokenType :: TokenT
, tokenText :: Text
, tokenPos :: Position
}
deriving Show
data IdQual = Unqual | Qual deriving (Eq,Show)
data IdCase = Lower | Upper deriving (Eq,Show)
data TokenT = Num Integer Int
| RealTok
| Ident IdQual IdCase
| TIdent
| StrLit Text
| KW TokenKW
| Sym TokenSym
| Op TokenOp
| White TokenW
| Err TokenErr
| EOF
deriving (Eq,Show)
data TokenW = BlockComment | Space
deriving (Eq,Show)
data TokenKW = KW_if | KW_then | KW_else
| KW_let | KW_in
| KW_match | KW_with | KW_end
| KW_as
| KW_forall | KW_exists | KW_not | KW_true | KW_false
| KW_theory
| KW_type
| KW_constant
| KW_function
| KW_predicate
| KW_inductive
| KW_coinductive
| KW_axiom
| KW_lemma
| KW_goal
| KW_use
| KW_clone
| KW_namespace
| KW_import
| KW_export
deriving (Eq,Show)
data TokenOp = ArrowL | ArrowR | ArrowLR
| AsymDisj | Disj | AsymConj | Conj
| BangOp
| OtherOp Int
deriving (Eq,Show)
data TokenSym = Comma | Dot | Semi | Colon | Quote
| Eq | Underscore | Bar
| ParenL | ParenR
| BracketL | BracketR
| CurlyL | CurlyR
deriving (Eq,Show)
data TokenErr = UnterminatedComment
| UnterminatedString
| UnterminatedChar
| InvalidString
| InvalidChar
| LexicalError
deriving (Eq,Show)