module LText.Parser.Lexer where
import Control.Monad.State
import Control.Monad.Except
data ExprTokens = TLamb
| TArrow
| TIdent String
| TGroup [ExprTokens]
| TLParen
| TRParen
deriving (Eq)
instance Show ExprTokens where
show TLamb = "λ"
show TArrow = "→"
show TLParen = "("
show TRParen = ")"
show (TIdent s) = s
show (TGroup ts) = "{" ++ show ts ++ "}"
data TokenState = TokenState
{ between :: String
} deriving (Eq, Show)
initTokenState :: TokenState
initTokenState = TokenState ""
tokenize :: String -> [ExprTokens]
tokenize s = go $ words s
where
go [] = []
go ("->":xs) = TArrow:go xs
go ("\\":xs) = TLamb:go xs
go ("(":xs) = TLParen:go xs
go (")":xs) = TRParen:go xs
go (x:xs) = let (r, TokenState lastIdent) = runState (foldM go' [] x) initTokenState in
if lastIdent == ""
then r ++ go xs
else r ++ [TIdent lastIdent] ++ go xs
where
go' :: MonadState TokenState m => [ExprTokens] -> Char -> m [ExprTokens]
go' acc '\\' = do
lexState <- get
put $ lexState { between = "" }
if between lexState == ""
then return $ acc ++ [TLamb]
else return $ acc ++ [TIdent $ between lexState, TLamb]
go' acc '(' = do
lexState <- get
put $ lexState { between = ""}
if between lexState == ""
then return $ acc ++ [TLParen]
else return $ acc ++ [TIdent $ between lexState, TLParen]
go' acc ')' = do
lexState <- get
put $ lexState { between = ""}
if between lexState == ""
then return $ acc ++ [TRParen]
else return $ acc ++ [TIdent $ between lexState, TRParen]
go' acc x = do
lexState <- get
put lexState {between = between lexState ++ [x]}
return acc
runGroup :: ( Monad m
, MonadError String m
) => StateT Integer m a -> m a
runGroup m = evalStateT m 0
group :: ( MonadState Integer m
) => ([ExprTokens], [ExprTokens])
-> m ([ExprTokens], [ExprTokens])
group (acc, []) =
return (acc, [])
group (acc, TLParen:xs) = do
(layer, rest) <- group ([], xs)
get >>= \x -> put $ x+1
group (acc ++ [TGroup layer], rest)
group (acc, TRParen:xs) = do
get >>= \x -> put $ x1
return (acc, xs)
group (acc, x:xs) =
group (acc ++ [x], xs)
lexer :: ( Monad m
, MonadError String m
) => String -> m [ExprTokens]
lexer s = do
ts' <- runGroup $ group ([], tokenize s)
return $ fst ts'