module Language.JS.Parser where
import Control.Applicative ((<|>))
import Control.Monad (liftM2)
import qualified Text.Parsec as P
import Language.JS.Types
import Language.JS.Common
import Language.JS.Operators
identifierName =
liftM2 (++) (P.many (P.oneOf "_$")) (P.many1 P.alphaNum)
identifier =
P.try (LI <$> (do i <- identifierName
case i `elem` reservedWords of
True -> P.unexpected "reserved word"
_ -> return i)) P.<?> "[identifier]"
numericLiteral =
LN <$> (binN <|> octN <|> hexN <|> decN)
P.<?> "[number-literal]"
where prefix p = do
i <- P.char '0'
x <- P.oneOf p
return [i, x]
combine = liftM2 (++)
hexN = combine (P.try (prefix "xX")) (P.many1 (P.oneOf "0123456789abcdefABCDEF"))
octN = combine (P.try (prefix "oO")) (P.many1 (P.oneOf "01234567"))
binN = combine (P.try (prefix "bB")) (P.many1 (P.oneOf "01"))
decN = do
lead <- P.many1 P.digit
fraction <- liftM2 (:) (P.char '.') (P.many P.digit) <|> return ""
expo <- expoN
return (lead ++ fraction ++ expo)
expoN = liftM2 (:) (P.oneOf "eE") (P.many P.digit) <|> return ""
booleanLiteral =
P.try (boolA "true" <|> boolA "false")
P.<?> "[boolean]"
where boolA = fmap (LB . toHask) . keywordB
toHask s | s == "true" = True
| otherwise = False
thisIdent =
const LThis <$> keywordB "this" P.<?> "[this]"
nullIdent =
const LNull <$> keywordB "null" P.<?> "[null]"
stringLiteral =
buildExpression LS '\"' withoutNewLineAllowed
<|> buildExpression LS '\'' withoutNewLineAllowed
<|> LTS <$> (P.char '`' *> templateString "" [])
P.<?> "[string-literal]"
where
withoutNewLineAllowed e = P.many (P.satisfy (\c -> c /= '\n' && c /= e))
buildExpression ctor wc p = ctor <$> P.try (P.between (P.char wc) (P.char wc) (p wc))
templateString str ls = (do
t <- P.anyToken
case t of
'$' -> do
e <- TExpression <$> braces (expressionNonEmpty True)
let s' = if length str > 0 then [TString str, e] else [e]
templateString "" (ls ++ s')
'`' -> return (ls ++ (if length str > 0 then [TString str] else []))
_ -> templateString (str ++ [t]) ls) <|> return ls
regexLiteral =
let re = (P.string "/" >> return "") <|>
(do es <- P.char '\\'
t <- P.anyToken
n <- re
return (es:t:n)) <|>
(liftM2 (:) P.anyToken re)
in RegExp <$> ((P.char '/') *> re) <*> P.many (P.oneOf "mgi")
elision =
const Elision <$> keywordB ","
P.<?> "elision"
arrayItems ls =
(lexeme (elision <|> item) >>= \x -> arrayItems (ls ++ [x])) <|> return ls
where item = checkSpread Spread (expressionNonEmpty False) <* P.optional (P.char ',')
arrayLiteral =
P.try (LA <$> brackets (betweenSpaces (arrayItems [])))
P.<?> "[array]"
objectBinds = do
sp <- P.optionMaybe (keywordB "...")
case sp of
Just _ -> OPI . Spread <$> literals
Nothing -> (OPM <$> (asyncMethodDef <|> classGetSetMethodDef <|> propertyMethodDef)) <|> (do
k <- identifier
x <- P.try (P.lookAhead (P.oneOf ",:}"))
case x of
':' -> P.try (OPKV k <$> (lexeme (P.char ':') *> lexeme (checkSpread Spread (expressionNonEmpty False)) P.<?> "[object-value-expression]"))
_ -> return (OPI k))
objectLiteral =
LO <$> lexeme (braces (P.sepBy (betweenSpaces objectBinds) (P.char ',')))
P.<?> "[object-literal]"
parensExpression =
LP <$> parens (betweenSpaces (expressionNonEmpty True))
P.<?> "[parenthesis]"
checkSpread ctor p =
do i <- P.optionMaybe (keywordB "...")
case i of
Just _ -> ctor <$> p
Nothing -> p
formalParameter =
betweenSpaces bindExpression
P.<?> "[formal-parameters]"
functionDeclaration =
keywordB "function" *>
(Function <$> lexeme (P.optionMaybe identifier)
<*> lexeme (parens (commaSep formalParameter))
<*> lexeme (SBlock <$> braces (betweenSpaces (P.many (lexeme statements)))))
P.<?> "[function]"
arrowFunctionDeclaration =
P.try (Arrow <$> (lexeme (parens manyParams <|> singleParam) <* keywordB "=>")
<*> blockOrStatements)
P.<?> "[arrow-function]"
where singleParam = Left <$> bindVar
manyParams = Right <$> commaSep formalParameter
functionExpression =
arrowFunctionDeclaration <|> functionDeclaration
propertyMethodDef =
P.try (PropertyMethod <$> lexeme identifier
<*> lexeme (parens (commaSep formalParameter))
<*> (SBlock <$> braces (whiteSpaces *> P.many (lexeme statements))))
P.<?> "[class-method-definition]"
classStaticDef =
lexeme (keywordB "static") *>
(ClassStatic <$> (propertyMethodDef <|> classPropertyDef))
classGetSetMethodDef =
(keywordB "set" *> (ClassSetMethod <$> propertyMethodDef)) <|>
(keywordB "get" *> (ClassGetMethod <$> propertyMethodDef))
P.<?> "[class-get-set-definition]"
asyncMethodDef =
keywordB "async" *> (Async <$> propertyMethodDef)
P.<?> "[async-definition]"
classPropertyDef =
P.try (ClassProperty <$> (lexeme identifier <* P.char '=' <* whiteSpaces)
<*> lexeme (expressionNonEmpty False))
P.<?> "[class-property]"
classDeclaration =
keywordB "class" *> (Class <$> (lexeme (P.optionMaybe identifier))
<*> P.optionMaybe (keywordB "extends" *> lexeme identifier)
<*> (SBlock <$> braces (whiteSpaces *> classBlock)))
P.<?> "[class-expression]"
where classBlock = P.many (lexeme (toStatement <$> classBlockDecls))
classBlockDecls = (classPropertyDef
<|> asyncMethodDef
<|> classStaticDef
<|> classGetSetMethodDef
<|> propertyMethodDef)
dotMember p =
Dot p <$> (lexeme (P.char '.') *> identifier)
P.<?> "[dot-expression]"
accessor p =
Acc p <$> brackets (betweenSpaces (expressionNonEmpty True))
P.<?> "[array-expression]"
functionCall p =
FCall p <$> lexeme (parens (commaSep (whiteSpaces *> expressionNonEmpty False)))
P.<?> "[function-call]"
newIdent =
const Nothing <$> keywordB "new" P.<?> "[new]"
memberExpression (Just p) =
(do dt <- (functionCall p <|> dotMember p <|> accessor p) P.<?> "[member-expression]"
memberExpression (Just dt)) <|> return p
memberExpression Nothing =
(New <$> expressions) P.<?> "[new-expression]"
literals =
thisIdent
<|> nullIdent
<|> booleanLiteral
<|> stringLiteral
<|> arrayLiteral
<|> objectLiteral
<|> regexLiteral
<|> numericLiteral
<|> identifier
primaryExpression =
literals
<|> functionDeclaration
<|> classDeclaration
<|> parensExpression
maybeSemi =
P.optional (P.char ';')
emptyExpression =
(const Empty) <$> (P.char ';')
P.<?> "[empty-expressions]"
leftHandSideExpression =
(newIdent <|> (Just <$> lexeme primaryExpression)) >>= memberExpression
P.<?> "left-hand-side-expression"
expressions =
emptyExpression <|> expressionNonEmpty True P.<?> "[expressions]"
comment =
P.try (Comment <$> (P.string "//" *> P.many (P.satisfy (\c -> c /= '\n'))))
multilineComment =
P.try (MultilineComment <$> (P.between (P.string "/*") (P.string "*/") (P.many P.anyToken)))
commentExpression =
comment <|> multilineComment
expressionNonEmpty notComma =
commentExpression
<|> functionExpression
<|> classDeclaration
<|> (operationExpression notComma (expressionNonEmpty notComma) leftHandSideExpression)
<|> primaryExpression
P.<?> "[non-empty-expressions]"
toStatement :: Expression -> Statement
toStatement (Function (Just (LI a)) b c) = SF a b c
toStatement (Class (Just (LI a)) b c) = SC a b c
toStatement a = SExp a
importNamespaceClause =
Namespace <$> ((keywordB "*" *> keywordB "as") *> identifier)
importBindClause =
BindNames <$> braces (commaSep (betweenSpaces identifier))
importDefaultNameClause =
DefaultName <$> lexeme identifier
importManyClauses =
commaSep1 (whiteSpaces *> (importBindClause <|> importDefaultNameClause))
importClauses =
(Left <$> importNamespaceClause) <|>
(Right <$> importManyClauses)
importFileStatement =
SImportFile <$> lexeme stringLiteral
importStatement =
SImport <$> (lexeme importClauses <* keywordB "from") <*> lexeme stringLiteral
importStatements =
keywordB "import" *> (importStatement <|> importFileStatement)
P.<?> "[import-statement]"
reexportStatement = P.try (SRExport <$> (lexeme (expressionNonEmpty False) <* keywordB "from") <*> lexeme stringLiteral)
exportDefaultStatement = keywordB "default" *> (SExportDefault <$> expressionNonEmpty False)
exportStatement = SExport <$> statements
exportStatements = keywordB "export" *> (reexportStatement <|> exportDefaultStatement <|> exportStatement)
P.<?> "[export-statement]"
continueStatement =
keywordB "continue" *> (SContinue <$> (P.optionMaybe identifier))
P.<?> "[continue-statement]"
breakStatement =
keywordB "break" *> (SBreak <$> (P.optionMaybe identifier))
P.<?> "[break-statement]"
blockStatement allowedStmt =
SBlock <$> P.try (braces (betweenSpaces (P.many allowedStmt)))
P.<?> "[block-statement]"
blockOrStatements =
SBlock <$> braces (whiteSpaces *> P.many (lexeme statements)) <|> statements
ifStatement =
keywordB "if" *> (SIf <$> (lexeme parensExpression)
<*> lexeme blockOrStatements
<*> P.optionMaybe (keywordB "else" *> blockOrStatements))
P.<?> "[if-statement]"
catchBlock =
keywordB "catch" *> (SCatch <$> lexeme (P.optionMaybe parensExpression)
<*> blockStatement statements)
P.<?> "[try/catch-statement]"
finallyBlock =
keywordB "finally" *> (SFinally <$> (blockStatement statements))
P.<?> "[try/catch/finally-statement]"
tryStatement =
keywordB "try" *> (STry <$> lexeme (blockStatement statements)
<*> catchBlock
<*> P.optionMaybe finallyBlock)
P.<?> "[try-statement]"
throwStatement =
keywordB "throw" *> (SThrow <$> (expressionNonEmpty False))
P.<?> "[throw-statement]"
returnStatement =
keywordB "return" *> (SReturn <$> expressions)
P.<?> "[return-statement]"
bindVar =
BindVar <$> lexeme identifier <*> P.optionMaybe (P.notFollowedBy (keywordB "=>") *> (lexeme (P.char '=') *> (expressionNonEmpty False)))
bindPatternDecl =
BindPattern <$> (lexeme (objectLiteral <|> arrayLiteral)) <*> P.optionMaybe (lexeme (P.char '=') *> (expressionNonEmpty False))
bindSpread =
BindRest <$> (keywordB "..." *> leftHandSideExpression)
bindExpression =
(bindVar <|> bindPatternDecl <|> bindSpread) P.<?> "[var-binds]"
constVariableStatement =
P.try (SVariable <$> (keywordB "const") <*> commaSep1 (betweenSpaces bindExpression))
notConstVariableStatement =
P.try (SVariable <$> (keywordB "let" <|> keywordB "var") <*> commaSep1 (betweenSpaces bindExpression))
variableStatement =
constVariableStatement <|> notConstVariableStatement P.<?> "[variable-statement]"
caseClause =
lexeme ((caseB <|> defaultCase) <* (P.char ':'))
P.<?> "[switch/case-expression]"
where defaultCase = const DefaultCase <$> (keywordB "default")
caseB = keywordB "case" *> (Case <$> literals)
caseDeclaration =
SCase <$> lexeme (P.many1 caseClause)
<*> P.many (lexeme ((breakStatement <* maybeSemi) <|> statements))
switchStatement =
keywordB "switch" *>
(SSwitch <$> lexeme parensExpression
<*> braces (betweenSpaces (P.many caseDeclaration)))
P.<?> "[switch-statement]"
debuggerStatement =
const SDebugger <$> keywordB "debugger"
P.<?> "[debugger-statement]"
breakableStatement =
blockStatement ((breakStatement <* maybeSemi) <|> statements) <|> statements
whileStatement =
keywordB "while" *>
(SWhile <$> lexeme (parens (P.many1 expressions))
<*> breakableStatement)
P.<?> "[while-statement]"
doWhileStatement =
keywordB "do" *>
(SDoWhile <$> lexeme breakableStatement
<*> (keywordB "while" *> parens (P.many1 expressions)))
P.<?> "[do/while-statement]"
forInVStyle =
P.try (ForInV <$> lexeme (keywordB "let" <|> keywordB "const" <|> keywordB "var")
<*> bindExpression
<*> (keywordB "in" *> (expressionNonEmpty False)))
forOfVStyle =
P.try (ForOfV <$> lexeme (keywordB "let" <|> keywordB "const" <|> keywordB "var")
<*> bindExpression
<*> (keywordB "of" *> expressionNonEmpty False ))
forInStyle =
P.try (ForIn <$> bindExpression <*> (keywordB "in" *> expressionNonEmpty False))
forOfStyle =
P.try (ForOf <$> bindExpression <*> (keywordB "of" *> expressionNonEmpty False))
forRegularStyle =
ForRegular <$> P.try (P.optionMaybe bindExpression <* (P.char ';'))
<*> P.try (P.optionMaybe (expressionNonEmpty True) <* (P.char ';'))
<*> P.optionMaybe (expressionNonEmpty True)
forStyle =
forInVStyle <|> forOfVStyle <|> forInStyle <|> forOfStyle <|> forRegularStyle P.<?> "[for-style]"
forStatement =
SFor <$> lexeme (keywordB "for" *> (parens forStyle)) <*> breakableStatement
iterationStatement = forStatement <|> whileStatement <|> doWhileStatement
withStatement =
keywordB "with" *> (SWith <$> lexeme (parens (expressionNonEmpty True))
<*> (SBlock <$> braces (whiteSpaces *> P.many (lexeme statements)) <|> statements))
P.<?> "[with-statement]"
labelledStatement =
SLabel <$> P.try (lexeme (identifier <* P.char ':')) <*> statements
P.<?> "[labelled-statement]"
statements = ((blockStatement statements
<|> ifStatement
<|> iterationStatement
<|> debuggerStatement
<|> labelledStatement
<|> continueStatement
<|> tryStatement
<|> throwStatement
<|> returnStatement
<|> switchStatement
<|> withStatement
<|> variableStatement
<|> fmap toStatement expressions) <* maybeSemi) P.<?> "[statements]"
topLevelStatements = importStatements <|> exportStatements <|> statements
parseJs = P.many (betweenSpaces (topLevelStatements <* maybeSemi))
parse = P.parse parseJs
parseFromFile filename = P.parse parseJs filename <$> readFile filename