{-|
This module contains lexer and error message primitives for a simple lambda calculus parser. It
demonstrates a simple but decently informative implementation of error message propagation.
-}

{-# language StrictData #-}

module FlatParse.Examples.BasicLambda.Lexer where

import FlatParse.Basic hiding (Parser, runParser, string, char, cut)

import qualified FlatParse.Basic as FP
import qualified Data.ByteString as B
import Language.Haskell.TH

import Data.String
import qualified Data.Set as S

--------------------------------------------------------------------------------

-- | An expected item which is displayed in error messages.
data Expected
  = Msg String  -- ^ An error message.
  | Lit String  -- ^ A literal expected thing.
  deriving (Expected -> Expected -> Bool
(Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool) -> Eq Expected
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Expected -> Expected -> Bool
$c/= :: Expected -> Expected -> Bool
== :: Expected -> Expected -> Bool
$c== :: Expected -> Expected -> Bool
Eq, Int -> Expected -> ShowS
[Expected] -> ShowS
Expected -> String
(Int -> Expected -> ShowS)
-> (Expected -> String) -> ([Expected] -> ShowS) -> Show Expected
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Expected] -> ShowS
$cshowList :: [Expected] -> ShowS
show :: Expected -> String
$cshow :: Expected -> String
showsPrec :: Int -> Expected -> ShowS
$cshowsPrec :: Int -> Expected -> ShowS
Show, Eq Expected
Eq Expected
-> (Expected -> Expected -> Ordering)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Expected)
-> (Expected -> Expected -> Expected)
-> Ord Expected
Expected -> Expected -> Bool
Expected -> Expected -> Ordering
Expected -> Expected -> Expected
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Expected -> Expected -> Expected
$cmin :: Expected -> Expected -> Expected
max :: Expected -> Expected -> Expected
$cmax :: Expected -> Expected -> Expected
>= :: Expected -> Expected -> Bool
$c>= :: Expected -> Expected -> Bool
> :: Expected -> Expected -> Bool
$c> :: Expected -> Expected -> Bool
<= :: Expected -> Expected -> Bool
$c<= :: Expected -> Expected -> Bool
< :: Expected -> Expected -> Bool
$c< :: Expected -> Expected -> Bool
compare :: Expected -> Expected -> Ordering
$ccompare :: Expected -> Expected -> Ordering
$cp1Ord :: Eq Expected
Ord)

instance IsString Expected where fromString :: String -> Expected
fromString = String -> Expected
Lit

-- | A parsing error.
data Error
  = Precise Pos Expected     -- ^ A precisely known error, like leaving out "in" from "let".
  | Imprecise Pos [Expected] -- ^ An imprecise error, when we expect a number of different things,
                             --   but parse something else.
  deriving Int -> Error -> ShowS
[Error] -> ShowS
Error -> String
(Int -> Error -> ShowS)
-> (Error -> String) -> ([Error] -> ShowS) -> Show Error
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Error] -> ShowS
$cshowList :: [Error] -> ShowS
show :: Error -> String
$cshow :: Error -> String
showsPrec :: Int -> Error -> ShowS
$cshowsPrec :: Int -> Error -> ShowS
Show

errorPos :: Error -> Pos
errorPos :: Error -> Pos
errorPos (Precise Pos
p Expected
_)   = Pos
p
errorPos (Imprecise Pos
p [Expected]
_) = Pos
p

-- | Merge two errors. Inner errors (which were thrown at points with more consumed inputs)
--   are preferred. If errors are thrown at identical input positions, we prefer precise errors
--   to imprecise ones.
--
--   The point of prioritizing inner and precise errors is to suppress the deluge of "expected"
--   items, and instead try to point to a concrete issue to fix.
merge :: Error -> Error -> Error
merge :: Error -> Error -> Error
merge Error
e Error
e' = case (Error -> Pos
errorPos Error
e, Error -> Pos
errorPos Error
e') of
  (Pos
p, Pos
p') | Pos
p Pos -> Pos -> Bool
forall a. Ord a => a -> a -> Bool
< Pos
p' -> Error
e'
  (Pos
p, Pos
p') | Pos
p Pos -> Pos -> Bool
forall a. Ord a => a -> a -> Bool
> Pos
p' -> Error
e
  (Pos
p, Pos
p')          -> case (Error
e, Error
e') of
    (Precise{}      , Error
_               ) -> Error
e
    (Error
_              , Precise{}       ) -> Error
e'
    (Imprecise Pos
_ [Expected]
es , Imprecise Pos
_ [Expected]
es' ) -> Pos -> [Expected] -> Error
Imprecise Pos
p ([Expected]
es [Expected] -> [Expected] -> [Expected]
forall a. [a] -> [a] -> [a]
++ [Expected]
es')
{-# noinline merge #-} -- merge is "cold" code, so we shouldn't inline it.

type Parser = FP.Parser Error

-- | Pretty print an error. The `B.ByteString` input is the source file. The offending line from the
--   source is displayed in the output.
prettyError :: B.ByteString -> Error -> String
prettyError :: ByteString -> Error -> String
prettyError ByteString
b Error
e =

  let pos :: Pos
      pos :: Pos
pos      = case Error
e of Imprecise Pos
pos [Expected]
e -> Pos
pos
                           Precise Pos
pos Expected
e   -> Pos
pos
      ls :: [String]
ls       = ByteString -> [String]
FP.lines ByteString
b
      (Int
l, Int
c)   = [(Int, Int)] -> (Int, Int)
forall a. [a] -> a
head ([(Int, Int)] -> (Int, Int)) -> [(Int, Int)] -> (Int, Int)
forall a b. (a -> b) -> a -> b
$ ByteString -> [Pos] -> [(Int, Int)]
posLineCols ByteString
b [Pos
pos]
      line :: String
line     = if Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< [String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
ls then [String]
ls [String] -> Int -> String
forall a. [a] -> Int -> a
!! Int
l else String
""
      linum :: String
linum    = Int -> String
forall a. Show a => a -> String
show Int
l
      lpad :: String
lpad     = (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map (Char -> Char -> Char
forall a b. a -> b -> a
const Char
' ') String
linum

      expected :: Expected -> String
expected (Lit String
s) = ShowS
forall a. Show a => a -> String
show String
s
      expected (Msg String
s) = String
s

      err :: Error -> String
err (Precise Pos
_ Expected
e)    = Expected -> String
expected Expected
e
      err (Imprecise Pos
_ [Expected]
es) = [Expected] -> String
imprec ([Expected] -> String) -> [Expected] -> String
forall a b. (a -> b) -> a -> b
$ Set Expected -> [Expected]
forall a. Set a -> [a]
S.toList (Set Expected -> [Expected]) -> Set Expected -> [Expected]
forall a b. (a -> b) -> a -> b
$ [Expected] -> Set Expected
forall a. Ord a => [a] -> Set a
S.fromList [Expected]
es

      imprec :: [Expected] -> String
      imprec :: [Expected] -> String
imprec []     = ShowS
forall a. HasCallStack => String -> a
error String
"impossible"
      imprec [Expected
e]    = Expected -> String
expected Expected
e
      imprec (Expected
e:[Expected]
es) = Expected -> String
expected Expected
e String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Expected] -> String
go [Expected]
es where
        go :: [Expected] -> String
go []     = String
""
        go [Expected
e]    = String
" or " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Expected -> String
expected Expected
e
        go (Expected
e:[Expected]
es) = String
", " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Expected -> String
expected Expected
e String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Expected] -> String
go [Expected]
es

  in Int -> String
forall a. Show a => a -> String
show Int
l String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
":" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
c String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
":\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++
     String
lpad   String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"|\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++
     String
linum  String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"| " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
line String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++
     String
lpad   String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"| " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> Char -> String
forall a. Int -> a -> [a]
replicate Int
c Char
' ' String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"^\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++
     String
"parse error: expected " String -> ShowS
forall a. [a] -> [a] -> [a]
++
     Error -> String
err Error
e

-- | Imprecise cut: we slap a list of items on inner errors.
cut :: Parser a -> [Expected] -> Parser a
cut :: Parser a -> [Expected] -> Parser a
cut Parser a
p [Expected]
es = do
  Pos
pos <- Parser Error Pos
forall e. Parser e Pos
getPos
  Parser a -> Error -> (Error -> Error -> Error) -> Parser a
forall e a. Parser e a -> e -> (e -> e -> e) -> Parser e a
FP.cutting Parser a
p (Pos -> [Expected] -> Error
Imprecise Pos
pos [Expected]
es) Error -> Error -> Error
merge

-- | Precise cut: we propagate at most a single error.
cut' :: Parser a -> Expected -> Parser a
cut' :: Parser a -> Expected -> Parser a
cut' Parser a
p Expected
e = do
  Pos
pos <- Parser Error Pos
forall e. Parser e Pos
getPos
  Parser a -> Error -> (Error -> Error -> Error) -> Parser a
forall e a. Parser e a -> e -> (e -> e -> e) -> Parser e a
FP.cutting Parser a
p (Pos -> Expected -> Error
Precise Pos
pos Expected
e) Error -> Error -> Error
merge

runParser :: Parser a -> B.ByteString -> Result Error a
runParser :: Parser a -> ByteString -> Result Error a
runParser = Parser a -> ByteString -> Result Error a
forall e a. Parser e a -> ByteString -> Result e a
FP.runParser

-- | Run parser, print pretty error on failure.
testParser :: Show a => Parser a -> String -> IO ()
testParser :: Parser a -> String -> IO ()
testParser Parser a
p String
str = case String -> ByteString
packUTF8 String
str of
  ByteString
b -> case Parser a -> ByteString -> Result Error a
forall a. Parser a -> ByteString -> Result Error a
runParser Parser a
p ByteString
b of
    Err Error
e  -> String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Error -> String
prettyError ByteString
b Error
e
    OK a
a ByteString
_ -> a -> IO ()
forall a. Show a => a -> IO ()
print a
a
    Result Error a
Fail   -> String -> IO ()
putStrLn String
"uncaught parse error"

-- | Parse a line comment.
lineComment :: Parser ()
lineComment :: Parser ()
lineComment =
  Parser Error Word8
-> (Word8 -> Parser ()) -> Parser () -> Parser ()
forall e a b.
Parser e a -> (a -> Parser e b) -> Parser e b -> Parser e b
withOption Parser Error Word8
forall e. Parser e Word8
anyWord8
    (\case Word8
10 -> Parser ()
ws
           Word8
_  -> Parser ()
lineComment)
    (() -> Parser ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ())

-- | Parse a potentially nested multiline comment.
multilineComment :: Parser ()
multilineComment :: Parser ()
multilineComment = Int -> Parser ()
forall a. (Eq a, Num a) => a -> Parser ()
go (Int
1 :: Int) where
  go :: a -> Parser ()
go a
0 = Parser ()
ws
  go a
n = $(switch [| case _ of
    "-}" -> go (n - 1)
    "{-" -> go (n + 1)
    _    -> branch anyWord8 (go n) (pure ()) |])

-- | Consume whitespace.
ws :: Parser ()
ws :: Parser ()
ws = $(switch [| case _ of
  " "  -> ws
  "\n" -> ws
  "\t" -> ws
  "\r" -> ws
  "--" -> lineComment
  "{-" -> multilineComment
  _    -> pure () |])

-- | Consume whitespace after running a parser.
token :: Parser a -> Parser a
token :: Parser a -> Parser a
token Parser a
p = Parser a
p Parser a -> Parser () -> Parser a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
ws
{-# inline token #-}

-- | Read a starting character of an identifier.
identStartChar :: Parser Char
identStartChar :: Parser Char
identStartChar = (Char -> Bool) -> Parser Char
forall e. (Char -> Bool) -> Parser e Char
satisfyASCII Char -> Bool
isLatinLetter
{-# inline identStartChar #-}

-- | Read a non-starting character of an identifier.
identChar :: Parser Char
identChar :: Parser Char
identChar = (Char -> Bool) -> Parser Char
forall e. (Char -> Bool) -> Parser e Char
satisfyASCII (\Char
c -> Char -> Bool
isLatinLetter Char
c Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c)
{-# inline identChar #-}

-- | Check whether a `Span` contains exactly a keyword. Does not change parsing state.
isKeyword :: Span -> Parser ()
isKeyword :: Span -> Parser ()
isKeyword Span
span = Span -> Parser () -> Parser ()
forall e a. Span -> Parser e a -> Parser e a
inSpan Span
span do
  $(FP.switch [| case _ of
      "lam"   -> pure ()
      "let"   -> pure ()
      "in"    -> pure ()
      "if"    -> pure ()
      "then"  -> pure ()
      "else"  -> pure ()
      "true"  -> pure ()
      "false" -> pure ()  |])
  Parser ()
forall e. Parser e ()
eof

-- | Parse a non-keyword string.
symbol :: String -> Q Exp
symbol :: String -> Q Exp
symbol String
str = [| token $(FP.string str) |]

-- | Parser a non-keyword string, throw precise error on failure.
symbol' :: String -> Q Exp
symbol' :: String -> Q Exp
symbol' String
str = [| $(symbol str) `cut'` Lit str |]

-- | Parse a keyword string.
keyword :: String -> Q Exp
keyword :: String -> Q Exp
keyword String
str = [| token ($(FP.string str) `notFollowedBy` identChar) |]

-- | Parse a keyword string, throw precise error on failure.
keyword' :: String -> Q Exp
keyword' :: String -> Q Exp
keyword' String
str = [| $(keyword str) `cut'` Lit str |]