-- |ECMAScript 3 syntax. /Spec/ refers to the ECMA-262 specification, -- 3rd edition. {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE ScopedTypeVariables #-} module Language.ECMAScript3.Syntax (JavaScript(..) ,unJavaScript ,Statement(..) ,isIterationStmt ,CaseClause(..) ,CatchClause(..) ,ForInit(..) ,ForInInit(..) ,VarDecl(..) ,Expression(..) ,InfixOp(..) ,AssignOp(..) ,Id(..) ,unId ,PrefixOp(..) ,Prop(..) ,UnaryAssignOp(..) ,LValue (..) ,SourcePos ,isValid ,isValidIdentifier ,isValidIdentifierName ,isReservedWord ,isValidIdStart ,isValidIdPart ,EnclosingStatement(..) ,pushLabel ,pushEnclosing ,HasLabelSet (..) ,isIter ,isIterSwitch ) where import Text.Parsec.Pos(initialPos,SourcePos) -- used by data JavaScript import Data.Data (Data) import Data.Typeable (Typeable) import Data.Foldable (Foldable) import Data.Traversable (Traversable) import Data.Default.Class import Data.Generics.Uniplate.Data import Data.Char import Control.Monad.State import Control.Arrow data JavaScript a -- |A script in \ ... \ tags. = Script a [Statement a] deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) instance Default a => Default (JavaScript a) where def = Script def [] -- | extracts statements from a JavaScript type unJavaScript :: JavaScript a -> [Statement a] unJavaScript (Script _ stmts) = stmts instance Default SourcePos where def = initialPos "" data Id a = Id a String deriving (Show,Eq,Ord,Data,Typeable,Functor,Foldable,Traversable) unId :: Id a -> String unId (Id _ s) = s -- | Infix operators: see spec 11.5-11.11 data InfixOp = OpLT -- ^ @<@ | OpLEq -- ^ @<=@ | OpGT -- ^ @>@ | OpGEq -- ^ @>=@ | OpIn -- ^ @in@ | OpInstanceof -- ^ @instanceof@ | OpEq -- ^ @==@ | OpNEq -- ^ @!=@ | OpStrictEq -- ^ @===@ | OpStrictNEq -- ^ @!===@ | OpLAnd -- ^ @&&@ | OpLOr -- ^ @||@ | OpMul -- ^ @*@ | OpDiv -- ^ @/@ | OpMod -- ^ @%@ | OpSub -- ^ @-@ | OpLShift -- ^ @<<@ | OpSpRShift -- ^ @>>@ | OpZfRShift -- ^ @>>>@ | OpBAnd -- ^ @&@ | OpBXor -- ^ @^@ | OpBOr -- ^ @|@ | OpAdd -- ^ @+@ deriving (Show,Data,Typeable,Eq,Ord,Enum) -- | Assignment operators: see spec 11.13 data AssignOp = OpAssign -- ^ simple assignment, @=@ | OpAssignAdd -- ^ @+=@ | OpAssignSub -- ^ @-=@ | OpAssignMul -- ^ @*=@ | OpAssignDiv -- ^ @/=@ | OpAssignMod -- ^ @%=@ | OpAssignLShift -- ^ @<<=@ | OpAssignSpRShift -- ^ @>>=@ | OpAssignZfRShift -- ^ @>>>=@ | OpAssignBAnd -- ^ @&=@ | OpAssignBXor -- ^ @^=@ | OpAssignBOr -- ^ @|=@ deriving (Show,Data,Typeable,Eq,Ord) -- | Unary assignment operators: see spec 11.3, 11.4.4, 11.4.5 data UnaryAssignOp = PrefixInc -- ^ @++x@ | PrefixDec -- ^ @--x@ | PostfixInc -- ^ @x++@ | PostfixDec -- ^ @x--@ deriving (Show, Data, Typeable, Eq, Ord) -- | Prefix operators: see spec 11.4 (excluding 11.4.4, 11.4.5) data PrefixOp = PrefixLNot -- ^ @!@ | PrefixBNot -- ^ @~@ | PrefixPlus -- ^ @+@ | PrefixMinus -- ^ @-@ | PrefixTypeof -- ^ @typeof@ | PrefixVoid -- ^ @void@ | PrefixDelete -- ^ @delete@ deriving (Show,Data,Typeable,Eq,Ord) -- | Property names in an object initializer: see spec 11.1.5 data Prop a = PropId a (Id a) -- ^ property name is an identifier, @foo@ | PropString a String -- ^ property name is a string, @\"foo\"@ | PropNum a Integer -- ^ property name is an integer, @42@ deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) -- | Left-hand side expressions: see spec 11.2 data LValue a = LVar a String -- ^ variable reference, @foo@ | LDot a (Expression a) String -- ^ @foo.bar@ | LBracket a (Expression a) (Expression a) -- ^ @foo[bar]@ deriving (Show, Eq, Ord, Data, Typeable, Functor,Foldable,Traversable) -- | Expressions, see spec 11 data Expression a = StringLit a String -- ^ @\"foo\"@, spec 11.1.3, 7.8 | RegexpLit a String Bool Bool -- ^ @RegexpLit a regexp global? case_insensitive?@ -- regular -- expression, see spec 11.1.3, 7.8 | NumLit a Double -- ^ @41.99999@, spec 11.1.3, 7.8 | IntLit a Int -- ^ @42@, spec 11.1.3, 7.8 | BoolLit a Bool -- ^ @true@, spec 11.1.3, 7.8 | NullLit a -- ^ @null@, spec 11.1.3, 7.8 | ArrayLit a [Expression a] -- ^ @[1,2,3]@, spec 11.1.4 | ObjectLit a [(Prop a, Expression a)] -- ^ @{foo:\"bar\", baz: 42}@, spec 11.1.5 | ThisRef a -- ^ @this@, spec 11.1.1 | VarRef a (Id a) -- ^ @foo@, spec 11.1.2 | DotRef a (Expression a) (Id a) -- ^ @foo.bar@, spec 11.2.1 | BracketRef a (Expression a) {- container -} (Expression a) {- key -} -- ^ @foo[bar@, spec 11.2.1 | NewExpr a (Expression a) {- constructor -} [Expression a] -- ^ @new foo(bar)@, spec 11.2.2 | PrefixExpr a PrefixOp (Expression a) -- ^ @\@e@, spec 11.4 (excluding 11.4.4, 111.4.5) | UnaryAssignExpr a UnaryAssignOp (LValue a) -- ^ @++x@, @x--@ etc., spec 11.3, 11.4.4, 11.4.5 | InfixExpr a InfixOp (Expression a) (Expression a) -- ^ @e1\@e2@, spec 11.5, 11.6, 11.7, 11.8, 11.9, 11.10, 11.11 | CondExpr a (Expression a) (Expression a) (Expression a) -- ^ @e1 ? e2 : e3@, spec 11.12 | AssignExpr a AssignOp (LValue a) (Expression a) -- ^ @e1 \@=e2@, spec 11.13 | ListExpr a [Expression a] -- ^ @e1, e2@, spec 11.14 | CallExpr a (Expression a) [Expression a] -- ^ @f(x,y,z)@, spec 11.2.3 --funcexprs are optionally named | FuncExpr a (Maybe (Id a)) [Id a] [Statement a] -- ^ @function f (x,y,z) {...}@, spec 11.2.5, 13 deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) -- | Case clauses, spec 12.11 data CaseClause a = CaseClause a (Expression a) [Statement a] -- ^ @case e: stmts;@ | CaseDefault a [Statement a] -- ^ @default: stmts;@ deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) -- | Catch clause, spec 12.14 data CatchClause a = CatchClause a (Id a) (Statement a) -- ^ @catch (x) {...}@ deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) -- | A variable declaration, spec 12.2 data VarDecl a = VarDecl a (Id a) (Maybe (Expression a)) -- ^ @var x = e;@ deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) -- | for initializer, spec 12.6 data ForInit a = NoInit -- ^ empty | VarInit [VarDecl a] -- ^ @var x, y=42@ | ExprInit (Expression a) -- ^ @expr@ deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) -- | for..in initializer, spec 12.6 data ForInInit a = ForInVar (Id a) -- ^ @var x@ | ForInLVal (LValue a) -- ^ @foo.baz@, @foo[bar]@, @z@ deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) -- | Statements, spec 12. data Statement a = BlockStmt a [Statement a] -- ^ @{stmts}@, spec 12.1 | EmptyStmt a -- ^ @;@, spec 12.3 | ExprStmt a (Expression a) -- ^ @expr;@, spec 12.4 | IfStmt a (Expression a) (Statement a) (Statement a) -- ^ @if (e) stmt@, spec 12.5 | IfSingleStmt a (Expression a) (Statement a) -- ^ @if (e) stmt1 else stmt2@, spec 12.5 | SwitchStmt a (Expression a) [CaseClause a] -- ^ @switch (e) clauses@, spec 12.11 | WhileStmt a (Expression a) (Statement a) -- ^ @while (e) do stmt@, spec 12.6 | DoWhileStmt a (Statement a) (Expression a) -- ^ @do stmt while (e);@, spec 12.6 | BreakStmt a (Maybe (Id a)) -- ^ @break lab;@, spec 12.8 | ContinueStmt a (Maybe (Id a)) -- ^ @continue lab;@, spec 12.7 | LabelledStmt a (Id a) (Statement a) -- ^ @lab: stmt@, spec 12.12 | ForInStmt a (ForInInit a) (Expression a) (Statement a) -- ^ @for (x in o) stmt@, spec 12.6 | ForStmt a (ForInit a) (Maybe (Expression a)) -- test (Maybe (Expression a)) -- increment (Statement a) -- body -- ^ @ForStmt a init test increment body@, @for (init; test, -- increment) body@, spec 12.6 | TryStmt a (Statement a) {-body-} (Maybe (CatchClause a)) (Maybe (Statement a)) {-finally-} -- ^ @try stmt catch(x) stmt finally stmt@, spec 12.14 | ThrowStmt a (Expression a) -- ^ @throw expr;@, spec 12.13 | ReturnStmt a (Maybe (Expression a)) -- ^ @return expr;@, spec 12.9 | WithStmt a (Expression a) (Statement a) -- ^ @with (o) stmt@, spec 12.10 | VarDeclStmt a [VarDecl a] -- ^ @var x, y=42;@, spec 12.2 | FunctionStmt a (Id a) {-name-} [Id a] {-args-} [Statement a] {-body-} -- ^ @function f(x, y, z) {...}@, spec 13 deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) -- | Returns 'True' if the statement is an /IterationStatement/ -- according to spec 12.6. isIterationStmt :: Statement a -> Bool isIterationStmt s = case s of WhileStmt {} -> True DoWhileStmt {} -> True ForStmt {} -> True ForInStmt {} -> True _ -> False -- | The ECMAScript standard defines certain syntactic restrictions on -- programs (or, more precisely, statements) that aren't easily -- enforced in the AST datatype. These restrictions have to do with -- labeled statements and break/continue statement, as well as -- identifier names. Thus, it is possible to manually generate AST's -- that correspond to syntactically incorrect programs. Use this -- predicate to check if an 'JavaScript' AST corresponds to a -- syntactically correct ECMAScript program. isValid :: forall a. (Data a, Typeable a) => JavaScript a -> Bool -- =From ECMA-262-3= -- A program is considered syntactically incorrect if either of the -- following is true: -- * The program contains a continue statement without the optional -- Identifier, which is not nested, directly or indirectly (but not -- crossing function boundaries), within an IterationStatement. -- * The program contains a continue statement with the optional -- Identifier, where Identifier does not appear in the label set of an -- enclosing (but not crossing function boundaries) IterationStatement. -- * The program contains a break statement without the optional -- Identifier, which is not nested, directly or indirectly (but not -- crossing function boundaries), within an IterationStatement or a -- SwitchStatement. -- * The program contains a break statement with the optional -- Identifier, where Identifier does not appear in the label set of an -- enclosing (but not crossing function boundaries) Statement. -- * The program contains a LabelledStatement that is enclosed by a -- LabelledStatement with the same Identifier as label. This does not -- apply to labels appearing within the body of a FunctionDeclaration -- that is nested, directly or indirectly, within a labelled -- statement. -- * The identifiers should be valid. See spec 7.6 isValid js = checkIdentifiers js && checkBreakContinueLabels js where checkIdentifiers :: (Data a, Typeable a) => JavaScript a -> Bool checkIdentifiers js = and $ map isValidIdentifierName $ [n | (Id _ n) :: Id a <- universeBi js] ++ [n | (LVar _ n) :: LValue a <- universeBi js] ++ [n | (LDot _ _ n) :: LValue a <- universeBi js] checkBreakContinueLabels js@(Script _ body) = and $ map checkStmt $ body ++ concat ([body | FuncExpr _ _ _ body <- universeBi js] ++ [body | FunctionStmt _ _ _ body <- universeBi js]) checkStmt :: Statement a -> Bool checkStmt s = evalState (checkStmtM s) ([], []) checkStmtM :: Statement a -> State ([Label], [EnclosingStatement]) Bool checkStmtM stmt = case stmt of ContinueStmt a mlab -> do encls <- gets snd let enIts = filter isIter encls return $ case mlab of Nothing -> not $ null enIts Just lab -> any (elem (unId lab) . getLabelSet) enIts BreakStmt a mlab -> do encls <- gets snd return $ case mlab of Nothing -> any isIterSwitch encls Just lab -> any (elem (unId lab) . getLabelSet) encls LabelledStmt _ lab s -> do labs <- gets fst if (unId lab) `elem` labs then return False else pushLabel lab $ checkStmtM s WhileStmt _ _ s -> iterCommon s DoWhileStmt _ s _ -> iterCommon s ForStmt _ _ _ _ s -> iterCommon s ForInStmt _ _ _ s -> iterCommon s SwitchStmt _ _ cs -> pushEnclosing EnclosingSwitch $ liftM and $ mapM checkCaseM cs BlockStmt _ ss -> pushEnclosing EnclosingOther $ liftM and $ mapM checkStmtM ss IfStmt _ _ t e -> liftM2 (&&) (checkStmtM t) (checkStmtM e) IfSingleStmt _ _ t -> checkStmtM t TryStmt _ body mcatch mfinally -> liftM2 (&&) (checkStmtM body) $ liftM2 (&&) (maybe (return True) checkCatchM mcatch) (maybe (return True) checkStmtM mfinally) WithStmt _ _ body -> checkStmtM body _ -> return True iterCommon s = pushEnclosing EnclosingIter $ checkStmtM s pushEnclosing :: Monad m => ([Label] -> EnclosingStatement) -> StateT ([Label], [EnclosingStatement]) m a -> StateT ([Label], [EnclosingStatement]) m a pushEnclosing ctor = bracketState (\(labs, encls) -> ([], ctor labs:encls)) pushLabel :: Monad m => Id b -> StateT ([Label], [EnclosingStatement]) m a -> StateT ([Label], [EnclosingStatement]) m a pushLabel l = bracketState (first (unId l:)) checkCaseM c = let ss = case c of CaseClause _ _ body -> body CaseDefault _ body -> body in liftM and $ mapM checkStmtM ss checkCatchM (CatchClause _ _ body) = checkStmtM body bracketState :: Monad m => (s -> s) -> StateT s m a -> StateT s m a bracketState f m = do original <- get modify f rv <- m put original return rv -- | Checks if an identifier name is valid according to the spec isValidIdentifier :: Id a -> Bool isValidIdentifier (Id _ name) = isValidIdentifierName name -- | Checks if the 'String' represents a valid identifier name isValidIdentifierName :: String -> Bool isValidIdentifierName name = case name of "" -> False (c:cs) -> isValidIdStart c && and (map isValidIdPart cs) && (not $ isReservedWord name) -- | Checks if a string is in the list of reserved ECMAScript words isReservedWord :: String -> Bool isReservedWord = (`elem` reservedWords) where reservedWords = keyword ++ futureReservedWord ++ nullKw ++ boolLit keyword = ["break", "case", "catch", "continue", "default", "delete" ,"do", "else", "finally", "for", "function", "if", "in" ,"instanceof", "new", "return", "switch", "this", "throw" ,"try", "typeof", "var", "void", "while", "with"] futureReservedWord = ["abstract", "boolean", "byte", "char", "class" ,"const", "debugger", "enum", "export", "extends" ,"final", "float", "goto", "implements", "int" ,"interface", "long", "native", "package", "private" ,"protected", "short", "static", "super" ,"synchronized", "throws", "transient", "volatile"] nullKw = ["null"] boolLit = ["true", "false"] -- | Checks if a character is valid at the start of an identifier isValidIdStart :: Char -> Bool isValidIdStart c = unicodeLetter c || c == '$' || c == '_' where unicodeLetter c = case generalCategory c of UppercaseLetter -> True LowercaseLetter -> True TitlecaseLetter -> True ModifierLetter -> True OtherLetter -> True LetterNumber -> True _ -> False -- | Checks if a character is valid in an identifier part isValidIdPart :: Char -> Bool isValidIdPart c = isValidIdStart c || isValidIdPartUnicode c where isValidIdPartUnicode c = case generalCategory c of NonSpacingMark -> True SpacingCombiningMark -> True DecimalNumber -> True ConnectorPunctuation -> True _ -> False data EnclosingStatement = EnclosingIter [Label] -- ^ The enclosing statement is an iteration statement | EnclosingSwitch [Label] -- ^ The enclosing statement is a switch statement | EnclosingOther [Label] -- ^ The enclosing statement is some other -- statement. Note, `EnclosingOther` is -- never pushed if the current `labelSet` is -- empty, so the list of labels in this -- constructor should always be non-empty instance Show EnclosingStatement where show (EnclosingIter ls) = "iteration" ++ show ls show (EnclosingSwitch ls) = "switch" ++ show ls show (EnclosingOther ls) = "statement" ++ show ls isIter :: EnclosingStatement -> Bool isIter (EnclosingIter _) = True isIter _ = False isIterSwitch :: EnclosingStatement -> Bool isIterSwitch (EnclosingIter _) = True isIterSwitch (EnclosingSwitch _) = True isIterSwitch _ = False class HasLabelSet a where getLabelSet :: a -> [Label] setLabelSet :: [Label] -> a -> a modifyLabelSet :: HasLabelSet a => ([Label] -> [Label]) -> a -> a modifyLabelSet f a = setLabelSet (f $ getLabelSet a) a instance HasLabelSet EnclosingStatement where getLabelSet e = case e of EnclosingIter ls -> ls EnclosingSwitch ls -> ls EnclosingOther ls -> ls setLabelSet ls e = case e of EnclosingIter _ -> EnclosingIter ls EnclosingSwitch _ -> EnclosingSwitch ls EnclosingOther _ -> EnclosingOther ls type Label = String