{-# LANGUAGE RecordWildCards, BangPatterns #-}

-- Most of this module follows the Haskell report, https://www.haskell.org/onlinereport/lexemes.html
module Lexer(Lexeme(..), lexer, unlexerFile) where

import Data.Char
import Data.List.Extra
import Data.Tuple.Extra

-- | A lexeme of text, approx some letters followed by some space.
data Lexeme = Lexeme
    {Lexeme -> Int
line :: {-# UNPACK #-} !Int -- ^ 1-based line number (0 = generated)
    ,Lexeme -> Int
col :: {-# UNPACK #-} !Int -- ^ 1-based col number (0 = generated)
    ,Lexeme -> String
lexeme :: String -- ^ Actual text of the item
    ,Lexeme -> String
whitespace :: String -- ^ Suffix spaces and comments
    } deriving Int -> Lexeme -> ShowS
[Lexeme] -> ShowS
Lexeme -> String
(Int -> Lexeme -> ShowS)
-> (Lexeme -> String) -> ([Lexeme] -> ShowS) -> Show Lexeme
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Lexeme] -> ShowS
$cshowList :: [Lexeme] -> ShowS
show :: Lexeme -> String
$cshow :: Lexeme -> String
showsPrec :: Int -> Lexeme -> ShowS
$cshowsPrec :: Int -> Lexeme -> ShowS
Show


charNewline :: Char -> Bool
charNewline Char
x = Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\r' Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\f'
charSpecial :: Char -> Bool
charSpecial Char
x = Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"(),;[]`{}"
charAscSymbol :: Char -> Bool
charAscSymbol Char
x = Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"!#$%&*+./<=>?@\\^|-~" Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':' -- special case for me
charSymbol :: Char -> Bool
charSymbol Char
x = Char -> Bool
charAscSymbol Char
x Bool -> Bool -> Bool
|| (Char -> Bool
isSymbol Char
x Bool -> Bool -> Bool
&& Bool -> Bool
not (Char -> Bool
charSpecial Char
x) Bool -> Bool -> Bool
&& Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
"_\"\'")

charIdentStart :: Char -> Bool
charIdentStart Char
x = Char -> Bool
isAlpha Char
x Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'
charIdentCont :: Char -> Bool
charIdentCont Char
x = Char -> Bool
isAlphaNum Char
x Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\''


lexer :: String -> [Lexeme]
lexer :: String -> [Lexeme]
lexer = Int -> Int -> String -> [Lexeme]
go1 Int
1 Int
1
    where
        -- we might start with whitespace, before any lexemes
        go1 :: Int -> Int -> String -> [Lexeme]
go1 Int
line Int
col String
xs
            | (String
whitespace, String
xs) <- String -> (String, String)
lexerWhitespace String
xs
            , String
whitespace String -> String -> Bool
forall a. Eq a => a -> a -> Bool
/= String
""
            , (Int
line2, Int
col2) <- Int -> Int -> String -> (Int, Int)
reposition Int
line Int
col String
whitespace
            = Lexeme :: Int -> Int -> String -> String -> Lexeme
Lexeme{lexeme :: String
lexeme=String
"", Int
String
whitespace :: String
col :: Int
line :: Int
whitespace :: String
col :: Int
line :: Int
..} Lexeme -> [Lexeme] -> [Lexeme]
forall a. a -> [a] -> [a]
: Int -> Int -> String -> [Lexeme]
go Int
line2 Int
col2 String
xs
        go1 Int
line Int
col String
xs = Int -> Int -> String -> [Lexeme]
go Int
line Int
col String
xs

        go :: Int -> Int -> String -> [Lexeme]
go Int
line Int
col String
"" = []
        go Int
line Int
col String
xs
            | (String
lexeme, String
xs) <- String -> (String, String)
lexerLexeme String
xs
            , (String
whitespace, String
xs) <- String -> (String, String)
lexerWhitespace String
xs
            , (Int
line2, Int
col2) <- Int -> Int -> String -> (Int, Int)
reposition Int
line Int
col (String -> (Int, Int)) -> String -> (Int, Int)
forall a b. (a -> b) -> a -> b
$ String
whitespace String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
lexeme
            = Lexeme :: Int -> Int -> String -> String -> Lexeme
Lexeme{Int
String
whitespace :: String
lexeme :: String
col :: Int
line :: Int
whitespace :: String
lexeme :: String
col :: Int
line :: Int
..} Lexeme -> [Lexeme] -> [Lexeme]
forall a. a -> [a] -> [a]
: Int -> Int -> String -> [Lexeme]
go Int
line2 Int
col2 String
xs


reposition :: Int -> Int -> String -> (Int, Int)
reposition :: Int -> Int -> String -> (Int, Int)
reposition = Int -> Int -> String -> (Int, Int)
forall t a. (Num t, Num a) => t -> a -> String -> (t, a)
go
    where
        go :: t -> a -> String -> (t, a)
go !t
line !a
col [] = (t
line, a
col)
        go t
line a
col (Char
x:String
xs)
            | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' = t -> a -> String -> (t, a)
go (t
linet -> t -> t
forall a. Num a => a -> a -> a
+t
1) a
1 String
xs
            | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t' = t -> a -> String -> (t, a)
go t
line (a
cola -> a -> a
forall a. Num a => a -> a -> a
+a
8) String
xs -- technically not totally correct, but please, don't use tabs
            | Bool
otherwise = t -> a -> String -> (t, a)
go t
line (a
cola -> a -> a
forall a. Num a => a -> a -> a
+a
1) String
xs


-- We take a lot of liberties with lexemes around module qualification, because we want to make fields magic
-- we ignore numbers entirely because they don't have any impact on what we want to do
lexerLexeme :: String -> (String, String)
lexerLexeme :: String -> (String, String)
lexerLexeme (Char
'\'':Char
x:Char
'\'':String
xs) = ([Char
'\'',Char
x,Char
'\''], String
xs)
lexerLexeme (Char
'\'':Char
x:String
xs) | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\'' = (String
"\'", Char
xChar -> ShowS
forall a. a -> [a] -> [a]
:String
xs) -- might be a data kind, see #25
lexerLexeme (Char
open:String
xs) | Char
open Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' Bool -> Bool -> Bool
|| Char
open Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\"' = String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen [Char
open] ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ String -> (String, String)
go String
xs
    where
        go :: String -> (String, String)
go (Char
x:String
xs) | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
open = ([Char
x], String
xs)
                  | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\\', Char
x2:String
xs <- String
xs = String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen [Char
x,Char
x2] ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ String -> (String, String)
go String
xs
                  | Bool
otherwise = String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen [Char
x] ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ String -> (String, String)
go String
xs
        go [] = ([], [])
lexerLexeme (Char
x:String
xs)
    | Char -> Bool
charSymbol Char
x
    , (String
a, String
xs) <- (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
charSymbol String
xs
    = (Char
xChar -> ShowS
forall a. a -> [a] -> [a]
:String
a, String
xs)
lexerLexeme (Char
x:String
xs)
    | Char -> Bool
charIdentStart Char
x
    , (String
a, String
xs) <- (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
charIdentCont String
xs
    = (Char
xChar -> ShowS
forall a. a -> [a] -> [a]
:String
a, String
xs)
lexerLexeme (Char
x:String
xs) = ([Char
x], String
xs)
lexerLexeme [] = ([], [])


lexerWhitespace :: String -> (String, String)
lexerWhitespace :: String -> (String, String)
lexerWhitespace (Char
x:String
xs) | Char -> Bool
isSpace Char
x = String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen [Char
x] ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ String -> (String, String)
lexerWhitespace String
xs
lexerWhitespace (Char
'-':Char
'-':String
xs)
    | (String
a, String
xs) <- (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-') String
xs
    , Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Char -> Bool
charSymbol (String -> Bool) -> String -> Bool
forall a b. (a -> b) -> a -> b
$ Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
1 String
xs
    , (String
b, String
xs) <- (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break Char -> Bool
charNewline String
xs
    , (String
c, String
xs) <- Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
1 String
xs
    = String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen String
"--" ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen String
a ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen String
b ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen String
c ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ String -> (String, String)
lexerWhitespace String
xs
lexerWhitespace (Char
'{':Char
'-':String
xs) = String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen String
"{-" ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ Integer -> String -> (String, String)
forall t. (Eq t, Num t) => t -> String -> (String, String)
f Integer
1 String
xs
    where
        f :: t -> String -> (String, String)
f t
1 (Char
'-':Char
'}':String
xs) = String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen String
"-}" ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ String -> (String, String)
lexerWhitespace String
xs
        f t
i (Char
'{':Char
'-':String
xs) = String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen String
"{-" ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ t -> String -> (String, String)
f (t
it -> t -> t
forall a. Num a => a -> a -> a
+t
1) String
xs
        f t
i (Char
x:String
xs) = String -> (String, String) -> (String, String)
forall a b. [a] -> ([a], b) -> ([a], b)
seen [Char
x] ((String, String) -> (String, String))
-> (String, String) -> (String, String)
forall a b. (a -> b) -> a -> b
$ t -> String -> (String, String)
f t
i String
xs
        f t
i [] = ([], [])
lexerWhitespace String
xs = ([], String
xs)

seen :: [a] -> ([a], b) -> ([a], b)
seen [a]
xs = ([a] -> [a]) -> ([a], b) -> ([a], b)
forall a a' b. (a -> a') -> (a, b) -> (a', b)
first ([a]
xs[a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++)


unlexerFile :: Maybe FilePath -> [Lexeme] -> String
unlexerFile :: Maybe String -> [Lexeme] -> String
unlexerFile Maybe String
src [Lexeme]
xs =
    Integer -> String
forall a. Show a => a -> String
dropping Integer
1 String -> ShowS
forall a. [a] -> [a] -> [a]
++
    -- we split the whitespace up to increase the chances of startLine being true below
    -- pretty ugly code...
    Int -> Bool -> [(Int, String)] -> String
go Int
1 Bool
True ([[(Int, String)]] -> [(Int, String)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
        [ [(Int
line, String
lexeme String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
w1 String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
1 String
w2)
          ,(if Int
line Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int
0 else Int
line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ((Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') (String
lexeme String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
w1 String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
1 String
w2)), ShowS
forall a. [a] -> [a]
drop1 String
w2)]
        | Lexeme{Int
String
whitespace :: String
col :: Int
lexeme :: String
line :: Int
whitespace :: Lexeme -> String
lexeme :: Lexeme -> String
col :: Lexeme -> Int
line :: Lexeme -> Int
..} <- [Lexeme]
xs, let (String
w1,String
w2) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') String
whitespace])
    where
        go
            :: Int -- ^ What line does GHC think we are on
            -> Bool -- ^ Are we at the start of a line
            -> [(Int, String)] -- ^ (original line, lexemes followed by their whitespace)
            -> String
        go :: Int -> Bool -> [(Int, String)] -> String
go Int
ghcLine Bool
startLine ((Int
i, String
x):[(Int, String)]
xs) =
            (if Bool
emitDropping then Int -> String
forall a. Show a => a -> String
dropping Int
i else String
"") String -> ShowS
forall a. [a] -> [a] -> [a]
++
            String
x String -> ShowS
forall a. [a] -> [a] -> [a]
++
            Int -> Bool -> [(Int, String)] -> String
go
                ((if Bool
emitDropping then Int
i else Int
ghcLine) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ((Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') String
x))
                (if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
x then Bool
startLine else String
"\n" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSuffixOf` String
x)
                [(Int, String)]
xs
            where emitDropping :: Bool
emitDropping = Int
ghcLine Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
i Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 Bool -> Bool -> Bool
&& Bool
startLine
        go Int
_ Bool
_ [] = String
""

        -- write out a line marker with a trailing newline
        dropping :: a -> String
dropping a
n = case Maybe String
src of
          Just String
src' -> String
"{-# LINE " String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
n String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show String
src' String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" #-}\n"
          Maybe String
Nothing -> String
""