module PropaFP.Parsers.Lisp.Parser
( tokenize
, parse
, parseSequence
, analyzeExpression
, analyzeExpressionSequence
, isScientificNumber) where
import PropaFP.Parsers.Lisp.DataTypes
import Prelude
import GHC.Utils.Misc (readRational)
import qualified Data.Scientific as S
import qualified Data.List as L
-- Constants.
symbolCharacters :: String
symbolCharacters :: String
symbolCharacters = String
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_!?-+*/%<>#.=^"

numberCharacters :: String
numberCharacters :: String
numberCharacters = String
"0123456789."

isSymbolCharacter :: Char -> Bool
isSymbolCharacter :: Char -> Bool
isSymbolCharacter Char
ch = Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
ch String
symbolCharacters

isNumberCharacter :: Char -> Bool
isNumberCharacter :: Char -> Bool
isNumberCharacter Char
ch = Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
ch String
numberCharacters

isSymbol :: String -> Bool
isSymbol :: String -> Bool
isSymbol = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isSymbolCharacter

-- Fixed issue with parsing -.1, multiple decimal points
isNumber :: String -> Bool
isNumber :: String -> Bool
isNumber [] = Bool
True
isNumber [Char
c] = Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
c String
"0123456789"
isNumber (Char
'-' : String
cs) = String -> Bool
isNumber String
cs
isNumber (Char
c : String
cs) = Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
c String
"0123456789" Bool -> Bool -> Bool
&& (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isNumberCharacter String
cs Bool -> Bool -> Bool
&& String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
L.length ((Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
L.filter (Char
'.' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==) String
cs) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
1 

isScientificNumber :: String -> Bool
isScientificNumber :: String -> Bool
isScientificNumber [] = Bool
True
isScientificNumber [Char
_c] = Bool
False
isScientificNumber (Char
'-' : String
cs) = String -> Bool
isScientificNumber String
cs
isScientificNumber (Char
c : String
cs) = 
  Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
c String
"0123456789"  Bool -> Bool -> Bool
&&
  case (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
L.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'e') String
cs of
    (String
_, []) -> Bool
False -- e is not in cs
    (String
beforeE, Char
_e : String
afterE) ->
      (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isNumberCharacter String
beforeE Bool -> Bool -> Bool
&&
      case String
afterE of
        []          -> Bool
False
        (Char
'-' : String
ecs) -> (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"0123456789") String
ecs
        String
ecs         -> (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"0123456789") String
ecs
  Bool -> Bool -> Bool
&& String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
L.length ((Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
L.filter (Char
'.' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==) String
cs) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
1 
-- The "tokenize" function is the first phase of converting the source code of
-- a Lisp program into an abstract syntax tree. It performs lexical analysis on
-- a String representation of a Lisp program by extracting a list of tokens.
tokenize :: String -> [String]
tokenize :: String -> [String]
tokenize [] = []
tokenize (Char
x:String
xs)
  | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
';' = String -> [String]
tokenize (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n') String
xs -- Remove comments
  | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'(' = [Char
x] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
tokenize String
xs
  | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
')' = [Char
x] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
tokenize String
xs
  | Char -> Bool
isNumberCharacter Char
x = String -> String -> Bool -> [String]
tokenizeNumber (Char
xChar -> String -> String
forall a. a -> [a] -> [a]
:String
xs) String
"" Bool
False
  | Char -> Bool
isSymbolCharacter Char
x = String -> String -> [String]
tokenizeSymbol (Char
xChar -> String -> String
forall a. a -> [a] -> [a]
:String
xs) String
""
  | Bool
otherwise = String -> [String]
tokenize String
xs

tokenizeNumber :: String -> String -> Bool -> [String]
tokenizeNumber :: String -> String -> Bool -> [String]
tokenizeNumber [] String
number Bool
foundE = [String
number]
tokenizeNumber (Char
x:String
xs) String
number Bool
foundE
  | Char -> Bool
isNumberCharacter Char
x = String -> String -> Bool -> [String]
tokenizeNumber String
xs (String
number String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
x]) Bool
foundE
  | (Char
'e' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
x) Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
foundE = String -> String -> Bool -> [String]
tokenizeNumber String
xs (String
number String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
x]) Bool
True -- Support scientific numbers
  | Bool
otherwise = String
number String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
tokenize (Char
xChar -> String -> String
forall a. a -> [a] -> [a]
:String
xs)

tokenizeSymbol :: String -> String -> [String]
tokenizeSymbol :: String -> String -> [String]
tokenizeSymbol [] String
symbol = [String
symbol]
tokenizeSymbol (Char
x:String
xs) String
number
  | Char -> Bool
isSymbolCharacter Char
x = String -> String -> [String]
tokenizeSymbol String
xs (String
number String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
x])
  | Bool
otherwise = String
number String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
tokenize (Char
xChar -> String -> String
forall a. a -> [a] -> [a]
:String
xs)

-- The "parse" function is the second phase of converting the source code of
-- a Lisp program into an abstract syntax tree. It takes the list of tokens
-- generated by "tokenize" and scans it until a valid Expression is created.
-- The newly built expression along with the remaining tokens are returned.
-- The Expressions that are returned are "primitive". They are entirely
-- comprised of elements such as boolean and numeric constants with the only
-- compound element being a "pair".
parse :: [String] -> (Expression, [String])
parse :: [String] -> (Expression, [String])
parse [] = (Expression
Null, [])
parse (String
x:[String]
xs)
  | String
x String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"(" = [String] -> (Expression, [String])
parseList [String]
xs
  | String
"#t" String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
x = ((Bool -> Expression
Boolean Bool
True), [String]
xs)
  | String
"#f" String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
x = ((Bool -> Expression
Boolean Bool
False), [String]
xs)
  | String
"null" String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
x = ((Expression
Null), [String]
xs)
  | String -> Bool
isScientificNumber String
x = ((Rational -> Expression
Number (Scientific -> Rational
forall a. Real a => a -> Rational
toRational (String -> Scientific
forall a. Read a => String -> a
read String
x :: S.Scientific))), [String]
xs)
  | String -> Bool
isNumber String
x = ((Rational -> Expression
Number (String -> Rational
readRational String
x)), [String]
xs)
  | String -> Bool
isSymbol String
x = ((String -> Expression
Variable String
x), [String]
xs)
  | Bool
otherwise = (Expression
Null, [])

-- A helper function to parse a list. A list is defined as either
-- Null or a Pair who's second element is a list.
parseList :: [String] -> (Expression, [String])
parseList :: [String] -> (Expression, [String])
parseList [] = (Expression
Null, [])
parseList tokens :: [String]
tokens@(String
x:[String]
xs)
  | String
x String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
")" = (Expression
Null, [String]
xs)
  | Bool
otherwise = ((Expression -> Expression -> Expression
Pair Expression
expr1 Expression
expr2), [String]
rest2)
                where (Expression
expr1, [String]
rest1) = [String] -> (Expression, [String])
parse [String]
tokens
                      (Expression
expr2, [String]
rest2) = [String] -> (Expression, [String])
parseList [String]
rest1

-- A helper function that takes the list of tokens generated by "tokenize"
-- and continues parsing until all the constituent Expressions are extracted.
parseSequence :: [String] -> [Expression]
parseSequence :: [String] -> [Expression]
parseSequence [] = []
parseSequence [String]
tokens = Expression
expr Expression -> [Expression] -> [Expression]
forall a. a -> [a] -> [a]
: [String] -> [Expression]
parseSequence [String]
rest
                       where (Expression
expr, [String]
rest) = [String] -> (Expression, [String])
parse [String]
tokens

-- The "analyzeExpression" function implements the third and final phase of
-- converting the source code of a Lisp program into an abstract syntax tree.
-- It takes a "primitive" Expression as input and converts it into a more
-- sophisticated abstract syntax tree Expression such as Lambda, Application,
-- If, Define, etc.
analyzeExpression :: Expression -> Expression
analyzeExpression :: Expression -> Expression
analyzeExpression Expression
Null = Expression
Null
analyzeExpression (Number Rational
number) = (Rational -> Expression
Number Rational
number)
analyzeExpression (Boolean Bool
bool) = (Bool -> Expression
Boolean Bool
bool)
analyzeExpression (Variable String
variable) = (String -> Expression
Variable String
variable)
analyzeExpression pair :: Expression
pair@(Pair Expression
first Expression
second)
  | Expression -> Bool
isIfExpression Expression
pair = Expression -> Expression
buildIfExpression Expression
pair
  | Expression -> Bool
isLambdaExpression Expression
pair = Expression -> Expression
buildLambdaExpression Expression
pair
  | Expression -> Bool
isDefinitionExpression Expression
pair = Expression -> Expression
buildDefinitionExpression Expression
pair
  | Expression -> Bool
isCondExpression Expression
pair = Expression -> Expression
buildCondExpression Expression
pair
  -- New special forms to be added here.
  | Bool
otherwise = Expression -> Expression
buildApplicationExpression Expression
pair

isIfExpression :: Expression -> Bool
isIfExpression :: Expression -> Bool
isIfExpression (Pair (Variable String
value) Expression
_) = String
value String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"if"
isIfExpression Expression
_ = Bool
False

buildIfExpression :: Expression -> Expression
buildIfExpression :: Expression -> Expression
buildIfExpression (Pair Expression
_ (Pair Expression
predicate (Pair Expression
thenClause (Pair Expression
elseClause Expression
Null)))) =
  Expression -> Expression -> Expression -> Expression
If (Expression -> Expression
analyzeExpression Expression
predicate) (Expression -> Expression
analyzeExpression Expression
thenClause) (Expression -> Expression
analyzeExpression Expression
elseClause)

isLambdaExpression :: Expression -> Bool
isLambdaExpression :: Expression -> Bool
isLambdaExpression (Pair (Variable String
value) Expression
_) = String
value String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"lambda"
isLambdaExpression Expression
_ = Bool
False

buildLambdaExpression :: Expression -> Expression
buildLambdaExpression :: Expression -> Expression
buildLambdaExpression (Pair Expression
_ (Pair Expression
parameters (Pair Expression
body Expression
Null))) =
  [Expression] -> Expression -> Expression
Lambda (Expression -> [Expression]
pairToList Expression
parameters) (Expression -> Expression
analyzeExpression Expression
body)

isDefinitionExpression :: Expression -> Bool
isDefinitionExpression :: Expression -> Bool
isDefinitionExpression (Pair (Variable String
value) Expression
_) = String
value String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"define"
isDefinitionExpression Expression
_ = Bool
False

buildDefinitionExpression :: Expression -> Expression
buildDefinitionExpression :: Expression -> Expression
buildDefinitionExpression (Pair Expression
_ (Pair Expression
variable (Pair Expression
value Expression
Null))) =
  Expression -> Expression -> Expression
Definition Expression
variable (Expression -> Expression
analyzeExpression Expression
value)

isCondExpression :: Expression -> Bool
isCondExpression :: Expression -> Bool
isCondExpression (Pair (Variable String
value) Expression
_) = String
value String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"cond"
isCondExpression Expression
_ = Bool
False

buildCondExpression :: Expression -> Expression
buildCondExpression :: Expression -> Expression
buildCondExpression (Pair Expression
_ Expression
second) = Expression -> Expression
buildCondExpressionHelper Expression
second

buildCondExpressionHelper :: Expression -> Expression
buildCondExpressionHelper :: Expression -> Expression
buildCondExpressionHelper (Expression
Null) = ([(Expression, Expression)] -> Expression
Cond [])
buildCondExpressionHelper (Pair (Pair Expression
predicate (Pair Expression
expression Expression
Null)) Expression
other) =
  ([(Expression, Expression)] -> Expression
Cond ((Expression -> Expression
analyzeExpression Expression
predicate, Expression -> Expression
analyzeExpression Expression
expression) (Expression, Expression)
-> [(Expression, Expression)] -> [(Expression, Expression)]
forall a. a -> [a] -> [a]
: [(Expression, Expression)]
cases))
  where (Cond [(Expression, Expression)]
cases) = Expression -> Expression
buildCondExpressionHelper Expression
other

buildApplicationExpression :: Expression -> Expression
buildApplicationExpression :: Expression -> Expression
buildApplicationExpression (Pair Expression
operator Expression
operands) =
  Expression -> [Expression] -> Expression
Application (Expression -> Expression
analyzeExpression Expression
operator) ((Expression -> Expression) -> [Expression] -> [Expression]
forall a b. (a -> b) -> [a] -> [b]
map Expression -> Expression
analyzeExpression (Expression -> [Expression]
pairToList Expression
operands))

analyzeExpressionSequence :: [Expression] -> [Expression]
analyzeExpressionSequence :: [Expression] -> [Expression]
analyzeExpressionSequence = (Expression -> Expression) -> [Expression] -> [Expression]
forall a b. (a -> b) -> [a] -> [b]
map Expression -> Expression
analyzeExpression