module Language.ECMAScript3.Parser
(parse
, parseScriptFromString
, parseJavaScriptFromFile
, parseScript
, parseExpression
, parseString
, ParsedStatement
, ParsedExpression
, parseSimpleExpr'
, parseBlockStmt
, parseStatement
, StatementParser
, ExpressionParser
, assignExpr
) where
import Language.ECMAScript3.Lexer hiding (identifier)
import qualified Language.ECMAScript3.Lexer as Lexer
import Language.ECMAScript3.Syntax
import Language.ECMAScript3.Syntax.Annotations
import Data.Default
import Text.Parsec hiding (parse)
import Text.Parsec.Expr
import Control.Monad(liftM,liftM2)
import Control.Monad.Trans (MonadIO,liftIO)
import Numeric(readDec,readOct,readHex)
import Data.Char
import Control.Monad.Identity
import Data.Maybe (isJust, isNothing, fromMaybe)
type ParsedStatement = Statement SourcePos
type ParsedExpression = Expression SourcePos
type CharParser a = ParsecT String ParserState Identity a
type StatementParser = CharParser ParsedStatement
type ExpressionParser = CharParser ParsedExpression
type ParserState = [String]
initialParserState :: ParserState
initialParserState = []
pushLabel :: String -> CharParser ()
pushLabel lab = do labs <- getState
pos <- getPosition
if lab `elem` labs
then fail $ "Duplicate label at " ++ show pos
else putState (lab:labs)
popLabel :: CharParser ()
popLabel = modifyState safeTail
where safeTail [] = []
safeTail (_:xs) = xs
clearLabels :: ParserState -> ParserState
clearLabels _ = []
withFreshLabelStack :: CharParser a -> CharParser a
withFreshLabelStack p = do oldState <- getState
putState $ clearLabels oldState
a <- p
putState oldState
return a
identifier :: CharParser (Id SourcePos)
identifier =
liftM2 Id getPosition Lexer.identifier
parseIfStmt:: StatementParser
parseIfStmt = do
pos <- getPosition
reserved "if"
test <- parseParenExpr <?> "parenthesized test-expression in if statement"
consequent <- parseStatement <?> "true-branch of if statement"
optional semi
((do reserved "else"
alternate <- parseStatement
return $ IfStmt pos test consequent alternate)
<|> return (IfSingleStmt pos test consequent))
parseSwitchStmt:: StatementParser
parseSwitchStmt =
let parseDefault = do
pos <- getPosition
reserved "default"
colon
statements <- many parseStatement
return (CaseDefault pos statements)
parseCase = do
pos <- getPosition
reserved "case"
condition <- parseListExpr
colon
actions <- many parseStatement
return (CaseClause pos condition actions)
isCaseDefault (CaseDefault _ _) = True
isCaseDefault _ = False
checkClauses cs = case filter isCaseDefault cs of
(_:c:_) -> fail $ "duplicate default clause in switch statement at " ++
show (getAnnotation c)
_ -> return ()
in do pos <- getPosition
reserved "switch"
test <- parseParenExpr
clauses <- braces $ many $ parseDefault <|> parseCase
checkClauses clauses
return (SwitchStmt pos test clauses)
parseWhileStmt:: StatementParser
parseWhileStmt = do
pos <- getPosition
reserved "while"
test <- parseParenExpr <?> "parenthesized test-expression in while loop"
body <- parseStatement
return (WhileStmt pos test body)
parseDoWhileStmt:: StatementParser
parseDoWhileStmt = do
pos <- getPosition
reserved "do"
body <- parseBlockStmt
reserved "while" <?> "while at the end of a do block"
test <- parseParenExpr <?> "parenthesized test-expression in do loop"
optional semi
return (DoWhileStmt pos body test)
parseContinueStmt:: StatementParser
parseContinueStmt = do
pos <- getPosition
reserved "continue"
pos' <- getPosition
id <- if sourceLine pos == sourceLine pos'
then liftM Just identifier <|> return Nothing
else return Nothing
optional semi
return $ ContinueStmt pos id
parseBreakStmt:: StatementParser
parseBreakStmt = do
pos <- getPosition
reserved "break"
pos' <- getPosition
id <- if sourceLine pos == sourceLine pos'
then liftM Just identifier <|> return Nothing
else return Nothing
optional semi
return $ BreakStmt pos id
parseBlockStmt:: StatementParser
parseBlockStmt = do
pos <- getPosition
statements <- braces (many parseStatement)
return (BlockStmt pos statements)
parseEmptyStmt:: StatementParser
parseEmptyStmt = do
pos <- getPosition
semi
return (EmptyStmt pos)
parseLabelledStmt:: StatementParser
parseLabelledStmt = do
pos <- getPosition
label <- try (do label <- identifier
colon
return label)
pushLabel $ unId label
statement <- parseStatement
popLabel
return (LabelledStmt pos label statement)
parseExpressionStmt:: StatementParser
parseExpressionStmt = do
pos <- getPosition
expr <- parseListExpr
optional semi
return $ ExprStmt pos expr
parseForInStmt:: StatementParser
parseForInStmt =
let parseInit = (reserved "var" >> liftM ForInVar identifier)
<|> liftM ForInLVal lvalue
in do pos <- getPosition
(init,expr) <- try $ do reserved "for"
parens $ do init <- parseInit
reserved "in"
expr <- parseExpression
return (init,expr)
body <- parseStatement
return $ ForInStmt pos init expr body
parseForStmt:: StatementParser
parseForStmt =
let parseInit = (reserved "var" >> liftM VarInit (parseVarDecl `sepBy` comma))
<|> liftM ExprInit parseListExpr
<|> return NoInit
in do pos <- getPosition
reserved "for"
reservedOp "("
init <- parseInit
semi
test <- optionMaybe parseExpression
semi
iter <- optionMaybe parseListExpr
reservedOp ")" <?> "closing paren"
stmt <- parseStatement
return $ ForStmt pos init test iter stmt
parseTryStmt:: StatementParser
parseTryStmt =
let parseCatchClause = do pos <- getPosition
reserved "catch"
id <- parens identifier
stmt <- parseStatement
return $ CatchClause pos id stmt
in do reserved "try"
pos <- getPosition
guarded <- parseStatement
mCatch <- optionMaybe parseCatchClause
mFinally <- optionMaybe $ reserved "finally" >> parseStatement
if isJust mCatch || isJust mFinally
then return $ TryStmt pos guarded mCatch mFinally
else fail $ "A try statement should have at least a catch\
\ or a finally block, at " ++ show pos
parseThrowStmt:: StatementParser
parseThrowStmt = do
pos <- getPosition
reserved "throw"
expr <- parseExpression
optional semi
return (ThrowStmt pos expr)
parseReturnStmt:: StatementParser
parseReturnStmt = do
pos <- getPosition
reserved "return"
expr <- optionMaybe parseListExpr
optional semi
return (ReturnStmt pos expr)
parseWithStmt:: StatementParser
parseWithStmt = do
pos <- getPosition
reserved "with"
context <- parseParenExpr
stmt <- parseStatement
return (WithStmt pos context stmt)
parseVarDecl = do
pos <- getPosition
id <- identifier
init <- (reservedOp "=" >> liftM Just parseExpression) <|> return Nothing
return (VarDecl pos id init)
parseVarDeclStmt:: StatementParser
parseVarDeclStmt = do
pos <- getPosition
reserved "var"
decls <- parseVarDecl `sepBy` comma
optional semi
return (VarDeclStmt pos decls)
parseFunctionStmt:: StatementParser
parseFunctionStmt = do
pos <- getPosition
name <- try (reserved "function" >> identifier)
args <- parens (identifier `sepBy` comma)
body <- withFreshLabelStack parseBlockStmt <?> "function body in { ... }"
return (FunctionStmt pos name args body)
parseStatement:: StatementParser
parseStatement = parseIfStmt <|> parseSwitchStmt <|> parseWhileStmt
<|> parseDoWhileStmt <|> parseContinueStmt <|> parseBreakStmt
<|> parseBlockStmt <|> parseEmptyStmt <|> parseForInStmt <|> parseForStmt
<|> parseTryStmt <|> parseThrowStmt <|> parseReturnStmt <|> parseWithStmt
<|> parseVarDeclStmt <|> parseFunctionStmt
<|> parseLabelledStmt <|> parseExpressionStmt <?> "statement"
--}}}
parseThisRef:: ExpressionParser
parseThisRef = do
pos <- getPosition
reserved "this"
return (ThisRef pos)
parseNullLit:: ExpressionParser
parseNullLit = do
pos <- getPosition
reserved "null"
return (NullLit pos)
parseBoolLit:: ExpressionParser
parseBoolLit = do
pos <- getPosition
let parseTrueLit = reserved "true" >> return (BoolLit pos True)
parseFalseLit = reserved "false" >> return (BoolLit pos False)
parseTrueLit <|> parseFalseLit
parseVarRef:: ExpressionParser
parseVarRef = liftM2 VarRef getPosition identifier
parseArrayLit:: ExpressionParser
parseArrayLit = liftM2 ArrayLit getPosition (squares (parseExpression `sepEndBy` comma))
parseFuncExpr = do
pos <- getPosition
reserved "function"
name <- optionMaybe identifier
args <- parens (identifier `sepBy` comma)
body <- withFreshLabelStack parseBlockStmt
return $ FuncExpr pos name args body
escapeChars =
[('\'','\''),('\"','\"'),('\\','\\'),('b','\b'),('f','\f'),('n','\n'),
('r','\r'),('t','\t'),('v','\v'),('/','/'),(' ',' '),('0','\0')]
allEscapes:: String
allEscapes = map fst escapeChars
parseEscapeChar = do
c <- oneOf allEscapes
let (Just c') = lookup c escapeChars
return c'
parseAsciiHexChar = do
char 'x'
d1 <- hexDigit
d2 <- hexDigit
return ((chr.fst.head.readHex) (d1:d2:""))
parseUnicodeHexChar = do
char 'u'
liftM (chr.fst.head.readHex)
(sequence [hexDigit,hexDigit,hexDigit,hexDigit])
isWhitespace ch = ch `elem` " \t"
parseStringLit' endWith =
(char endWith >> return "") <|>
(do try (string "\\'")
cs <- parseStringLit' endWith
return $ "'" ++ cs) <|>
(do char '\\'
c <- parseEscapeChar <|> parseAsciiHexChar <|> parseUnicodeHexChar <|>
char '\r' <|> char '\n'
cs <- parseStringLit' endWith
if c == '\r' || c == '\n'
then return (c:dropWhile isWhitespace cs)
else return (c:cs)) <|>
liftM2 (:) anyChar (parseStringLit' endWith)
parseStringLit:: ExpressionParser
parseStringLit = do
pos <- getPosition
str <- lexeme $ (char '\'' >>= parseStringLit') <|> (char '\"' >>= parseStringLit')
return $ StringLit pos str
--}}}
parseRegexpLit:: ExpressionParser
parseRegexpLit = do
let parseFlags = do
flags <- many (oneOf "mgi")
return $ \f -> f ('g' `elem` flags) ('i' `elem` flags)
let parseEscape :: CharParser Char
parseEscape = char '\\' >> anyChar
let parseChar :: CharParser Char
parseChar = noneOf "/"
let parseRe = (char '/' >> return "") <|>
(do char '\\'
ch <- anyChar
rest <- parseRe
return ('\\':ch:rest)) <|>
liftM2 (:) anyChar parseRe
pos <- getPosition
char '/'
pat <- parseRe
flags <- parseFlags
spaces
return $ flags (RegexpLit pos pat)
parseObjectLit:: ExpressionParser
parseObjectLit =
let parseProp = do
name <- liftM (\(StringLit p s) -> PropString p s) parseStringLit
<|> liftM2 PropId getPosition identifier
<|> liftM2 PropNum getPosition decimal
colon
val <- assignExpr
return (name,val)
in do pos <- getPosition
props <- braces (parseProp `sepEndBy` comma) <?> "object literal"
return $ ObjectLit pos props
hexLit = do
try (string "0x")
digits <- many1 (oneOf "0123456789abcdefABCDEF")
[(hex,"")] <- return $ Numeric.readHex digits
return (True, hex)
mkDecimal:: Double -> Double -> Int -> Double
mkDecimal w f e = if f >= 1.0
then mkDecimal w (f / 10.0) e
else (w + f) * (10.0 ^^ e)
exponentPart = do
oneOf "eE"
(char '+' >> decimal) <|> (char '-' >> negate `fmap` decimal) <|> decimal
jparser = liftM Just
decLit =
(do whole <- decimal
mfrac <- option Nothing (jparser (char '.' >> decimal))
mexp <- option Nothing (jparser exponentPart)
if isNothing mfrac && isNothing mexp
then return (True, fromIntegral whole)
else return (False, mkDecimal (fromIntegral whole)
(fromIntegral (fromMaybe 0 mfrac))
(fromIntegral (fromMaybe 0 mexp)))) <|>
(do frac <- char '.' >> decimal
exp <- option 0 exponentPart
return (False, mkDecimal 0.0 (fromIntegral frac) (fromIntegral exp)))
parseNumLit:: ExpressionParser
parseNumLit = do
pos <- getPosition
(isint, num) <- lexeme $ hexLit <|> decLit
notFollowedBy identifierStart <?> "whitespace"
if isint
then return $ IntLit pos (round num)
else return $ NumLit pos num
withPos cstr p = do { pos <- getPosition; e <- p; return $ cstr pos e }
dotRef e = (reservedOp "." >> withPos cstr identifier) <?> "property.ref"
where cstr pos = DotRef pos e
funcApp e = parens (withPos cstr (parseExpression `sepBy` comma))
<?>"(function application)"
where cstr pos = CallExpr pos e
bracketRef e = brackets (withPos cstr parseExpression) <?> "[property-ref]"
where cstr pos = BracketRef pos e
parseParenExpr:: ExpressionParser
parseParenExpr = withPos ParenExpr (parens parseListExpr)
parseExprForNew = parseThisRef <|> parseNullLit <|> parseBoolLit <|> parseStringLit
<|> parseArrayLit <|> parseParenExpr <|> parseNewExpr <|> parseNumLit
<|> parseRegexpLit <|> parseObjectLit <|> parseVarRef
parseSimpleExpr' = parseThisRef <|> parseNullLit <|> parseBoolLit
<|> parseStringLit <|> parseArrayLit <|> parseParenExpr
<|> parseFuncExpr <|> parseNumLit <|> parseRegexpLit <|> parseObjectLit
<|> parseVarRef
parseNewExpr =
(do pos <- getPosition
reserved "new"
constructor <- parseSimpleExprForNew Nothing
arguments <- try (parens (parseExpression `sepBy` comma)) <|> return []
return (NewExpr pos constructor arguments)) <|>
parseSimpleExpr'
parseSimpleExpr (Just e) = ((dotRef e <|> funcApp e <|> bracketRef e) >>=
parseSimpleExpr . Just)
<|> return e
parseSimpleExpr Nothing = do
e <- parseNewExpr <?> "expression (3)"
parseSimpleExpr (Just e)
parseSimpleExprForNew (Just e) = ((dotRef e <|> bracketRef e) >>=
parseSimpleExprForNew . Just)
<|> return e
parseSimpleExprForNew Nothing = do
e <- parseNewExpr <?> "expression (3)"
parseSimpleExprForNew (Just e)
--}}}
makeInfixExpr str constr = Infix parser AssocLeft where
parser:: CharParser (Expression SourcePos -> Expression SourcePos -> Expression SourcePos)
parser = do
pos <- getPosition
reservedOp str
return (InfixExpr pos constr)
parsePrefixedExpr = do
pos <- getPosition
op <- optionMaybe $ (reservedOp "!" >> return PrefixLNot) <|>
(reservedOp "~" >> return PrefixBNot) <|>
(try (lexeme $ char '-' >> notFollowedBy (char '-')) >>
return PrefixMinus) <|>
(try (lexeme $ char '+' >> notFollowedBy (char '+')) >>
return PrefixPlus) <|>
(reserved "typeof" >> return PrefixTypeof) <|>
(reserved "void" >> return PrefixVoid) <|>
(reserved "delete" >> return PrefixDelete)
case op of
Nothing -> unaryAssignExpr
Just op -> do
innerExpr <- parsePrefixedExpr
return (PrefixExpr pos op innerExpr)
exprTable:: [[Operator String ParserState Identity ParsedExpression]]
exprTable =
[ [ makeInfixExpr "==" OpEq
, makeInfixExpr "!=" OpNEq
, makeInfixExpr "===" OpStrictEq
, makeInfixExpr "!==" OpStrictNEq
]
, [ makeInfixExpr "||" OpLOr ]
, [ makeInfixExpr "&&" OpLAnd ]
, [ makeInfixExpr "|" OpBOr ]
, [ makeInfixExpr "^" OpBXor ]
, [ makeInfixExpr "&" OpBAnd ]
, [ makeInfixExpr "<" OpLT
, makeInfixExpr "<=" OpLEq
, makeInfixExpr ">" OpGT
, makeInfixExpr ">=" OpGEq
, makeInfixExpr "instanceof" OpInstanceof
, makeInfixExpr "in" OpIn
]
, [ makeInfixExpr "<<" OpLShift
, makeInfixExpr ">>" OpSpRShift
, makeInfixExpr ">>>" OpZfRShift
]
, [ makeInfixExpr "+" OpAdd
, makeInfixExpr "-" OpSub
]
, [ makeInfixExpr "*" OpMul
, makeInfixExpr "/" OpDiv
, makeInfixExpr "%" OpMod
]
]
parseExpression' =
buildExpressionParser exprTable parsePrefixedExpr <?> "simple expression"
asLValue :: SourcePos
-> Expression SourcePos
-> CharParser (LValue SourcePos)
asLValue p' e = case e of
VarRef p (Id _ x) -> return (LVar p x)
DotRef p e (Id _ x) -> return (LDot p e x)
BracketRef p e1 e2 -> return (LBracket p e1 e2)
otherwise -> fail $ "expected a left-value at " ++ show p'
lvalue :: CharParser (LValue SourcePos)
lvalue = do
p <- getPosition
e <- parseSimpleExpr Nothing
asLValue p e
unaryAssignExpr :: CharParser ParsedExpression
unaryAssignExpr = do
p <- getPosition
let prefixInc = do
reservedOp "++"
liftM (UnaryAssignExpr p PrefixInc) lvalue
let prefixDec = do
reservedOp "--"
liftM (UnaryAssignExpr p PrefixDec) lvalue
let postfixInc e = do
reservedOp "++"
liftM (UnaryAssignExpr p PostfixInc) (asLValue p e)
let postfixDec e = do
reservedOp "--"
liftM (UnaryAssignExpr p PostfixDec) (asLValue p e)
let other = do
e <- parseSimpleExpr Nothing
postfixInc e <|> postfixDec e <|> return e
prefixInc <|> prefixDec <|> other
parseTernaryExpr':: CharParser (ParsedExpression,ParsedExpression)
parseTernaryExpr' = do
reservedOp "?"
l <- assignExpr
colon
r <- assignExpr
return (l,r)
parseTernaryExpr:: ExpressionParser
parseTernaryExpr = do
e <- parseExpression'
e' <- optionMaybe parseTernaryExpr'
case e' of
Nothing -> return e
Just (l,r) -> do p <- getPosition
return $ CondExpr p e l r
assignOp :: CharParser AssignOp
assignOp =
(reservedOp "=" >> return OpAssign) <|>
(reservedOp "+=" >> return OpAssignAdd) <|>
(reservedOp "-=" >> return OpAssignSub) <|>
(reservedOp "*=" >> return OpAssignMul) <|>
(reservedOp "/=" >> return OpAssignDiv) <|>
(reservedOp "%=" >> return OpAssignMod) <|>
(reservedOp "<<=" >> return OpAssignLShift) <|>
(reservedOp ">>=" >> return OpAssignSpRShift) <|>
(reservedOp ">>>=" >> return OpAssignZfRShift) <|>
(reservedOp "&=" >> return OpAssignBAnd) <|>
(reservedOp "^=" >> return OpAssignBXor) <|>
(reservedOp "|=" >> return OpAssignBOr)
assignExpr :: ExpressionParser
assignExpr = do
p <- getPosition
lhs <- parseTernaryExpr
let assign = do
op <- assignOp
lhs <- asLValue p lhs
rhs <- assignExpr
return (AssignExpr p op lhs rhs)
assign <|> return lhs
parseExpression:: ExpressionParser
parseExpression = assignExpr
parseListExpr = liftM2 ListExpr getPosition (assignExpr `sepBy1` comma)
parseScript:: CharParser (JavaScript SourcePos)
parseScript = do
whiteSpace
liftM2 Script getPosition (parseStatement `sepBy` whiteSpace)
parse :: Stream s Identity t =>
Parsec s [String] a
-> SourceName
-> s
-> Either ParseError a
parse p = runParser p initialParserState
parseJavaScriptFromFile :: MonadIO m => String
-> m [Statement SourcePos]
parseJavaScriptFromFile filename = do
chars <- liftIO $ readFile filename
case parse parseScript filename chars of
Left err -> fail (show err)
Right (Script _ stmts) -> return stmts
parseScriptFromString :: String
-> String
-> Either ParseError (JavaScript SourcePos)
parseScriptFromString = parse parseScript
parseString :: String
-> [Statement SourcePos]
parseString str = case parse parseScript "" str of
Left err -> error (show err)
Right (Script _ stmts) -> stmts