{-
  Definitions

  * Lexer: `Text -> [Text]`
  * Token: The leaves in your AST, e.g. TextLiteral, Number, etc

  Here, we implement a structurally aware lexer that supports one token type
  (text literals) for convenience.
-}
{-# LANGUAGE TupleSections #-}

module Snail.Lexer (
    sExpression,
    snailAst,

    -- * Exported for testing
    nonQuoteCharacter,
    textLiteral,
) where

import Data.Text (Text)
import Data.Text qualified as Text
import Data.Void
import Snail.Ast
import Snail.Characters
import Text.Megaparsec hiding (token)
import Text.Megaparsec.Char
import Text.Megaparsec.Char.Lexer qualified as L

-- | TODO: 'Void' is the error type but we should use an explicit error type
type Parser = Parsec Void Text

{- | Megaparsec's 'skipLineComment' takes a prefix and skips lines that begin
 with that prefix
-}
skipLineComment :: Parser ()
skipLineComment :: Parser ()
skipLineComment = Tokens Text -> Parser ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Tokens s -> m ()
L.skipLineComment Tokens Text
"--"

{- | Megaparsec's 'skipBlockComment' takes prefix and suffix and skips anything
 in between
-}
skipBlockComment :: Parser ()
skipBlockComment :: Parser ()
skipBlockComment = Tokens Text -> Tokens Text -> Parser ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Tokens s -> Tokens s -> m ()
L.skipBlockCommentNested Tokens Text
"{-" Tokens Text
"-}"

{- | Generate a parser for whitespace in a language with 'skipLineComment' and
 'skipBlockComment'
-}
spaces :: Parser ()
spaces :: Parser ()
spaces = Parser () -> Parser () -> Parser () -> Parser ()
forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> m () -> m () -> m ()
L.space Parser ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space1 Parser ()
skipLineComment Parser ()
skipBlockComment

-- | Parse a 'Text' verbatim
symbol :: Text -> Parser Text
symbol :: Text -> Parser Text
symbol = Parser ()
-> Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> Tokens s -> m (Tokens s)
L.symbol Parser ()
spaces

roundP :: Parser a -> Parser (Bracket, a)
roundP :: forall a. Parser a -> Parser (Bracket, a)
roundP = (a -> (Bracket, a))
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity (Bracket, a)
forall a b.
(a -> b)
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bracket
Round,) (ParsecT Void Text Identity a
 -> ParsecT Void Text Identity (Bracket, a))
-> (ParsecT Void Text Identity a -> ParsecT Void Text Identity a)
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity (Bracket, a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser Text
-> Parser Text
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity a
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Text -> Parser Text
symbol Text
"(") (Text -> Parser Text
symbol Text
")")

square :: Parser a -> Parser (Bracket, a)
square :: forall a. Parser a -> Parser (Bracket, a)
square = (a -> (Bracket, a))
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity (Bracket, a)
forall a b.
(a -> b)
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bracket
Square,) (ParsecT Void Text Identity a
 -> ParsecT Void Text Identity (Bracket, a))
-> (ParsecT Void Text Identity a -> ParsecT Void Text Identity a)
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity (Bracket, a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser Text
-> Parser Text
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity a
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Text -> Parser Text
symbol Text
"[") (Text -> Parser Text
symbol Text
"]")

curly :: Parser a -> Parser (Bracket, a)
curly :: forall a. Parser a -> Parser (Bracket, a)
curly = (a -> (Bracket, a))
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity (Bracket, a)
forall a b.
(a -> b)
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bracket
Curly,) (ParsecT Void Text Identity a
 -> ParsecT Void Text Identity (Bracket, a))
-> (ParsecT Void Text Identity a -> ParsecT Void Text Identity a)
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity (Bracket, a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser Text
-> Parser Text
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity a
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Text -> Parser Text
symbol Text
"{") (Text -> Parser Text
symbol Text
"}")

-- | Parse an S-Expression bracketed by 'Bracket'
bracket :: Parser a -> Parser (Bracket, a)
bracket :: forall a. Parser a -> Parser (Bracket, a)
bracket Parser a
inp = Parser a -> Parser (Bracket, a)
forall a. Parser a -> Parser (Bracket, a)
roundP Parser a
inp Parser (Bracket, a) -> Parser (Bracket, a) -> Parser (Bracket, a)
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser a -> Parser (Bracket, a)
forall a. Parser a -> Parser (Bracket, a)
square Parser a
inp Parser (Bracket, a) -> Parser (Bracket, a) -> Parser (Bracket, a)
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser a -> Parser (Bracket, a)
forall a. Parser a -> Parser (Bracket, a)
curly Parser a
inp

{- | Any 'Text' object that starts with an appropriately valid character. This
 could be an variable or function name. For example, `hello` is a valid
 lexeme in the s-expression below.

 @
 (hello)
 @
-}
lexeme :: Parser SnailAst
lexeme :: Parser SnailAst
lexeme = do
    SourcePos
sourcePosition <- ParsecT Void Text Identity SourcePos
forall s e (m :: * -> *).
(TraversableStream s, MonadParsec e s m) =>
m SourcePos
getSourcePos
    [Char]
txt <- ParsecT Void Text Identity Char
-> ParsecT Void Text Identity [Char]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some (ParsecT Void Text Identity Char
 -> ParsecT Void Text Identity [Char])
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity [Char]
forall a b. (a -> b) -> a -> b
$ [Token Text] -> ParsecT Void Text Identity (Token Text)
forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
oneOf [Char]
[Token Text]
validCharacter
    SnailAst -> Parser SnailAst
forall a. a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (SnailAst -> Parser SnailAst) -> SnailAst -> Parser SnailAst
forall a b. (a -> b) -> a -> b
$ (SourcePos, Text) -> SnailAst
Lexeme (SourcePos
sourcePosition, [Char] -> Text
Text.pack [Char]
txt)

-- | An escaped quote to support nesting `"` inside a 'textLiteral'
escapedQuote :: Parser Text
escapedQuote :: Parser Text
escapedQuote = Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Text
Tokens Text
"\\\""

-- | Matches any non-quote character
nonQuoteCharacter :: Parser Text
nonQuoteCharacter :: Parser Text
nonQuoteCharacter = do
    Char
character <- Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
anySingleBut Char
Token Text
'\"'
    Text -> Parser Text
forall a. a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> Text
Text.singleton Char
character

quote :: Parser Char
quote :: ParsecT Void Text Identity Char
quote = Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\"'

quotes :: Parser a -> Parser a
quotes :: forall a. Parser a -> Parser a
quotes = ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity a
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between ParsecT Void Text Identity Char
quote ParsecT Void Text Identity Char
quote

{- | Matches a literal text and supports nested quotes, e.g.

 @
 ("hello\"")
 @
-}
textLiteral :: Parser SnailAst
textLiteral :: Parser SnailAst
textLiteral = do
    SourcePos
sourcePosition <- ParsecT Void Text Identity SourcePos
forall s e (m :: * -> *).
(TraversableStream s, MonadParsec e s m) =>
m SourcePos
getSourcePos
    Maybe [Text]
mText <- Parser (Maybe [Text]) -> Parser (Maybe [Text])
forall a. Parser a -> Parser a
quotes (Parser (Maybe [Text]) -> Parser (Maybe [Text]))
-> (ParsecT Void Text Identity [Text] -> Parser (Maybe [Text]))
-> ParsecT Void Text Identity [Text]
-> Parser (Maybe [Text])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParsecT Void Text Identity [Text] -> Parser (Maybe [Text])
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (ParsecT Void Text Identity [Text] -> Parser (Maybe [Text]))
-> ParsecT Void Text Identity [Text] -> Parser (Maybe [Text])
forall a b. (a -> b) -> a -> b
$ Parser Text -> ParsecT Void Text Identity [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some (Parser Text -> ParsecT Void Text Identity [Text])
-> Parser Text -> ParsecT Void Text Identity [Text]
forall a b. (a -> b) -> a -> b
$ Parser Text
escapedQuote Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
nonQuoteCharacter
    ParsecT Void Text Identity (Token Text) -> Parser ()
forall a. ParsecT Void Text Identity a -> Parser ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy ([Token Text] -> ParsecT Void Text Identity (Token Text)
forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
oneOf [Char]
[Token Text]
validCharacter ParsecT Void Text Identity (Token Text)
-> ParsecT Void Text Identity (Token Text)
-> ParsecT Void Text Identity (Token Text)
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\"')
    SnailAst -> Parser SnailAst
forall a. a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (SnailAst -> Parser SnailAst) -> SnailAst -> Parser SnailAst
forall a b. (a -> b) -> a -> b
$ case Maybe [Text]
mText of
        Maybe [Text]
Nothing -> (SourcePos, Text) -> SnailAst
TextLiteral (SourcePos
sourcePosition, Text
"")
        Just [Text]
text -> (SourcePos, Text) -> SnailAst
TextLiteral (SourcePos
sourcePosition, [Text] -> Text
Text.concat [Text]
text)

{- | Parse one of the possible structures in 'SnailAst'. These are parsed
 recursively separated by 'spaces' in 'sExpression'.
-}
leaves :: Parser SnailAst
leaves :: Parser SnailAst
leaves = Parser SnailAst
lexeme Parser SnailAst -> Parser SnailAst -> Parser SnailAst
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser SnailAst
textLiteral Parser SnailAst -> Parser SnailAst -> Parser SnailAst
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser SnailAst
sExpression

-- | Parse an 'SExpression'
sExpression :: Parser SnailAst
sExpression :: Parser SnailAst
sExpression = do
    Maybe Char
startingChar <- ParsecT Void Text Identity Char
-> ParsecT Void Text Identity (Maybe Char)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ([Token Text] -> ParsecT Void Text Identity (Token Text)
forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
oneOf [Char]
[Token Text]
parenthesisStartingCharacter)
    (Bracket
bracketType, [SnailAst]
expr) <- Parser [SnailAst] -> Parser (Bracket, [SnailAst])
forall a. Parser a -> Parser (Bracket, a)
bracket (Parser SnailAst
leaves Parser SnailAst -> Parser () -> Parser [SnailAst]
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`sepEndBy` Parser ()
spaces)
    SnailAst -> Parser SnailAst
forall a. a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (SnailAst -> Parser SnailAst) -> SnailAst -> Parser SnailAst
forall a b. (a -> b) -> a -> b
$ Maybe Char -> Bracket -> [SnailAst] -> SnailAst
SExpression Maybe Char
startingChar Bracket
bracketType [SnailAst]
expr

-- | Parse a valid snail file
snailAst :: Parser [SnailAst]
snailAst :: Parser [SnailAst]
snailAst = (Parser ()
spaces Parser () -> Parser [SnailAst] -> Parser [SnailAst]
forall a b.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity b -> ParsecT Void Text Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser SnailAst
sExpression Parser SnailAst -> Parser () -> Parser [SnailAst]
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`sepEndBy1` Parser ()
spaces) Parser [SnailAst] -> Parser () -> Parser [SnailAst]
forall a b.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity b -> ParsecT Void Text Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof