{-# OPTIONS_GHC -Wall #-}
{-# LANGUAGE StandaloneDeriving #-}
module Parser.Tokeniser (
Tokeniser',
Tokens',
add_token,
current_line_and_char,
delete_char,
gather_token,
get_char,
get_token,
take_token,
tokenisation_error,
tokenise,
tokens_ended) where
import Control.Monad.Except (MonadError (..))
import Control.Monad.RWS.Strict (RWST, execRWST)
import Control.Monad.Reader (MonadReader (..))
import Control.Monad.State.Strict (MonadState (..))
import Control.Monad.Writer.Strict (MonadWriter (..))
import Parser.Errors (Error (..))
import Parser.Line_and_char (L (..), Line_and_char, init_line_and_char)
data State char_class = State {State char_class -> Line_and_char
state_line_and_char :: Line_and_char, State char_class -> [char_class]
state_text :: [char_class]}
data Tokeniser' char_class token err t =
Tokeniser {Tokeniser' char_class token err t
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
run_tokeniser :: RWST (char_class -> Line_and_char -> Line_and_char) [L token] (State char_class) (Either err) t}
data Tokens' token = Tokens [L token] Line_and_char
instance Applicative (Tokeniser' char_class token err) where
Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(a -> b)
tokenise_0 <*> :: Tokeniser' char_class token err (a -> b)
-> Tokeniser' char_class token err a
-> Tokeniser' char_class token err b
<*> Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
tokenise_1 = RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b
-> Tokeniser' char_class token err b
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser (RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(a -> b)
tokenise_0 RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(a -> b)
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
tokenise_1)
pure :: a -> Tokeniser' char_class token err a
pure a
x = RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
-> Tokeniser' char_class token err a
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser (a
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
forall (m :: * -> *) a. Monad m => a -> m a
return a
x)
instance Functor (Tokeniser' char_class token err) where
fmap :: (a -> b)
-> Tokeniser' char_class token err a
-> Tokeniser' char_class token err b
fmap a -> b
f (Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
tokenise') = RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b
-> Tokeniser' char_class token err b
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser (a -> b
f (a -> b)
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
tokenise')
instance Monad (Tokeniser' char_class token err) where
Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
tokenise' >>= :: Tokeniser' char_class token err a
-> (a -> Tokeniser' char_class token err b)
-> Tokeniser' char_class token err b
>>= a -> Tokeniser' char_class token err b
f = RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b
-> Tokeniser' char_class token err b
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser (RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
tokenise' RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
a
-> (a
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b)
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Tokeniser' char_class token err b
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b
forall char_class token err t.
Tokeniser' char_class token err t
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
run_tokeniser (Tokeniser' char_class token err b
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b)
-> (a -> Tokeniser' char_class token err b)
-> a
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> a -> Tokeniser' char_class token err b
f)
deriving instance Show char_class => Show (State char_class)
deriving instance Show token => Show (Tokens' token)
add_token :: token -> Tokeniser' char_class token err ()
add_token :: token -> Tokeniser' char_class token err ()
add_token token
token =
do
Line_and_char
line_and_char <- State char_class -> Line_and_char
forall char_class. State char_class -> Line_and_char
state_line_and_char (State char_class -> Line_and_char)
-> Tokeniser' char_class token err (State char_class)
-> Tokeniser' char_class token err Line_and_char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
-> Tokeniser' char_class token err (State char_class)
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
forall s (m :: * -> *). MonadState s m => m s
get
Line_and_char -> token -> Tokeniser' char_class token err ()
forall token char_class err.
Line_and_char -> token -> Tokeniser' char_class token err ()
add_token' Line_and_char
line_and_char token
token
add_token' :: Line_and_char -> token -> Tokeniser' char_class token err ()
add_token' :: Line_and_char -> token -> Tokeniser' char_class token err ()
add_token' Line_and_char
line_and_char token
token = RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
()
-> Tokeniser' char_class token err ()
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser ([L token]
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
()
forall w (m :: * -> *). MonadWriter w m => w -> m ()
tell [Line_and_char -> token -> L token
forall t. Line_and_char -> t -> L t
L Line_and_char
line_and_char token
token])
current_line_and_char :: Tokens' token -> Line_and_char
current_line_and_char :: Tokens' token -> Line_and_char
current_line_and_char (Tokens [L token]
tokens Line_and_char
end_line_and_char) =
case [L token]
tokens of
[] -> Line_and_char
end_line_and_char
L Line_and_char
line_and_char token
_ : [L token]
_ -> Line_and_char
line_and_char
delete_char :: Tokeniser' char_class token err ()
delete_char :: Tokeniser' char_class token err ()
delete_char =
do
char_class -> Line_and_char -> Line_and_char
next_line_and_char <- RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(char_class -> Line_and_char -> Line_and_char)
-> Tokeniser'
char_class token err (char_class -> Line_and_char -> Line_and_char)
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(char_class -> Line_and_char -> Line_and_char)
forall r (m :: * -> *). MonadReader r m => m r
ask
State Line_and_char
line_and_char [char_class]
text <- RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
-> Tokeniser' char_class token err (State char_class)
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
forall s (m :: * -> *). MonadState s m => m s
get
case [char_class]
text of
[] -> () -> Tokeniser' char_class token err ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
char_class
char_class : [char_class]
text' -> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
()
-> Tokeniser' char_class token err ()
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser (State char_class
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
()
forall s (m :: * -> *). MonadState s m => s -> m ()
put (Line_and_char -> [char_class] -> State char_class
forall char_class.
Line_and_char -> [char_class] -> State char_class
State (char_class -> Line_and_char -> Line_and_char
next_line_and_char char_class
char_class Line_and_char
line_and_char) [char_class]
text'))
gather_token :: (char_class -> Maybe Char) -> (String -> token) -> Tokeniser' char_class token err ()
gather_token :: (char_class -> Maybe Char)
-> (String -> token) -> Tokeniser' char_class token err ()
gather_token char_class -> Maybe Char
recognise_char String -> token
string_to_token =
do
Line_and_char
line_and_char <- State char_class -> Line_and_char
forall char_class. State char_class -> Line_and_char
state_line_and_char (State char_class -> Line_and_char)
-> Tokeniser' char_class token err (State char_class)
-> Tokeniser' char_class token err Line_and_char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
-> Tokeniser' char_class token err (State char_class)
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
forall s (m :: * -> *). MonadState s m => m s
get
String
token <- (char_class -> Maybe Char)
-> Tokeniser' char_class token err String
forall char_class token err.
(char_class -> Maybe Char)
-> Tokeniser' char_class token err String
gather_token' char_class -> Maybe Char
recognise_char
Line_and_char -> token -> Tokeniser' char_class token err ()
forall token char_class err.
Line_and_char -> token -> Tokeniser' char_class token err ()
add_token' Line_and_char
line_and_char (String -> token
string_to_token String
token)
gather_token' :: (char_class -> Maybe Char) -> Tokeniser' char_class token err String
gather_token' :: (char_class -> Maybe Char)
-> Tokeniser' char_class token err String
gather_token' char_class -> Maybe Char
recognise_char =
let
f :: Tokeniser' char_class token err String
f = (char_class -> Maybe Char)
-> Tokeniser' char_class token err String
forall char_class token err.
(char_class -> Maybe Char)
-> Tokeniser' char_class token err String
gather_token' char_class -> Maybe Char
recognise_char
in
do
Maybe char_class
maybe_char_class <- Int -> Tokeniser' char_class token err (Maybe char_class)
forall char_class token err.
Int -> Tokeniser' char_class token err (Maybe char_class)
get_char Int
0
case Maybe char_class
maybe_char_class Maybe char_class -> (char_class -> Maybe Char) -> Maybe Char
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= char_class -> Maybe Char
recognise_char of
Maybe Char
Nothing -> String -> Tokeniser' char_class token err String
forall (m :: * -> *) a. Monad m => a -> m a
return String
""
Just Char
char ->
do
Tokeniser' char_class token err ()
forall char_class token err. Tokeniser' char_class token err ()
delete_char
String
token <- Tokeniser' char_class token err String
forall token err. Tokeniser' char_class token err String
f
String -> Tokeniser' char_class token err String
forall (m :: * -> *) a. Monad m => a -> m a
return (Char
char Char -> ShowS
forall a. a -> [a] -> [a]
: String
token)
get_char :: Int -> Tokeniser' char_class token err (Maybe char_class)
get_char :: Int -> Tokeniser' char_class token err (Maybe char_class)
get_char Int
i =
do
[char_class]
text <- State char_class -> [char_class]
forall char_class. State char_class -> [char_class]
state_text (State char_class -> [char_class])
-> Tokeniser' char_class token err (State char_class)
-> Tokeniser' char_class token err [char_class]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
-> Tokeniser' char_class token err (State char_class)
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
forall s (m :: * -> *). MonadState s m => m s
get
Maybe char_class
-> Tokeniser' char_class token err (Maybe char_class)
forall (m :: * -> *) a. Monad m => a -> m a
return
(case Int
0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
i Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< [char_class] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [char_class]
text of
Bool
False -> Maybe char_class
forall a. Maybe a
Nothing
Bool
True -> char_class -> Maybe char_class
forall a. a -> Maybe a
Just ([char_class]
text [char_class] -> Int -> char_class
forall a. [a] -> Int -> a
!! Int
i))
get_token :: Tokens' token -> Maybe token
get_token :: Tokens' token -> Maybe token
get_token (Tokens [L token]
tokens Line_and_char
_) =
case [L token]
tokens of
[] -> Maybe token
forall a. Maybe a
Nothing
L Line_and_char
_ token
token : [L token]
_ -> token -> Maybe token
forall a. a -> Maybe a
Just token
token
take_token :: (token -> Maybe t) -> Tokens' token -> Maybe (t, Tokens' token)
take_token :: (token -> Maybe t) -> Tokens' token -> Maybe (t, Tokens' token)
take_token token -> Maybe t
f (Tokens [L token]
tokens Line_and_char
end_line_and_char) =
case [L token]
tokens of
[] -> Maybe (t, Tokens' token)
forall a. Maybe a
Nothing
L Line_and_char
_ token
token : [L token]
tokens' ->
do
t
x <- token -> Maybe t
f token
token
(t, Tokens' token) -> Maybe (t, Tokens' token)
forall (m :: * -> *) a. Monad m => a -> m a
return (t
x, [L token] -> Line_and_char -> Tokens' token
forall token. [L token] -> Line_and_char -> Tokens' token
Tokens [L token]
tokens' Line_and_char
end_line_and_char)
tokenisation_error :: (Line_and_char -> err) -> Tokeniser' char_class token err t
tokenisation_error :: (Line_and_char -> err) -> Tokeniser' char_class token err t
tokenisation_error Line_and_char -> err
err =
do
Line_and_char
line_and_char <- State char_class -> Line_and_char
forall char_class. State char_class -> Line_and_char
state_line_and_char (State char_class -> Line_and_char)
-> Tokeniser' char_class token err (State char_class)
-> Tokeniser' char_class token err Line_and_char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
-> Tokeniser' char_class token err (State char_class)
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
(State char_class)
forall s (m :: * -> *). MonadState s m => m s
get
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
forall char_class token err t.
RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
-> Tokeniser' char_class token err t
Tokeniser (err
-> RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
t
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (Line_and_char -> err
err Line_and_char
line_and_char))
tokenise ::
(
(Char -> char_class) ->
(char_class -> Line_and_char -> Line_and_char) ->
Tokeniser' char_class token err () ->
String ->
Either Error (Either err (Tokens' token)))
tokenise :: (Char -> char_class)
-> (char_class -> Line_and_char -> Line_and_char)
-> Tokeniser' char_class token err ()
-> String
-> Either Error (Either err (Tokens' token))
tokenise Char -> char_class
classify_char char_class -> Line_and_char -> Line_and_char
next_line_and_char (Tokeniser RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
()
tokenise') String
text =
case RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
()
-> (char_class -> Line_and_char -> Line_and_char)
-> State char_class
-> Either err (State char_class, [L token])
forall (m :: * -> *) r w s a.
Monad m =>
RWST r w s m a -> r -> s -> m (s, w)
execRWST RWST
(char_class -> Line_and_char -> Line_and_char)
[L token]
(State char_class)
(Either err)
()
tokenise' char_class -> Line_and_char -> Line_and_char
next_line_and_char (Line_and_char -> [char_class] -> State char_class
forall char_class.
Line_and_char -> [char_class] -> State char_class
State Line_and_char
init_line_and_char (Char -> char_class
classify_char (Char -> char_class) -> String -> [char_class]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String
text)) of
Left err
err -> Either err (Tokens' token)
-> Either Error (Either err (Tokens' token))
forall a b. b -> Either a b
Right (err -> Either err (Tokens' token)
forall a b. a -> Either a b
Left err
err)
Right (State Line_and_char
line_and_char [char_class]
text', [L token]
tokens) ->
case [char_class]
text' of
[] -> Either err (Tokens' token)
-> Either Error (Either err (Tokens' token))
forall a b. b -> Either a b
Right (Tokens' token -> Either err (Tokens' token)
forall a b. b -> Either a b
Right ([L token] -> Line_and_char -> Tokens' token
forall token. [L token] -> Line_and_char -> Tokens' token
Tokens [L token]
tokens Line_and_char
line_and_char))
[char_class]
_ -> Error -> Either Error (Either err (Tokens' token))
forall a b. a -> Either a b
Left Error
Incomplete_tokenisation
tokens_ended :: Tokens' token -> Bool
tokens_ended :: Tokens' token -> Bool
tokens_ended (Tokens [L token]
tokens Line_and_char
_) = [L token] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [L token]
tokens