module HJS.Parser.Lexer where import Data.Char import Data.Set import Data.List import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Pos import qualified Text.ParserCombinators.Parsec.Token as P import Text.ParserCombinators.Parsec.Language (javaStyle) import HJS.Parser.Utils javaScriptStyle= javaStyle { P.reservedNames = [ "break","else","new","var","case", "finally","return","void","catch","for","switch","while", "continue","function","this","with","default","if", "throw","delete","in","try","do","instanceof","typeof", "abstract","enum","int","short", "boolean","export","interface","static", "byte","extends","long","super", "char","final","native","synchronized", "class","float","package","throws", "const","goto","private","transient", "debugger","implements","protected","volatile", "double","import","public", "true", "false", "null" ] , P.reservedOpNames = [ "==" , "=" , "+=" , "-=" , "++" , "--" , ";" , ":" , "?" , "," , "." , "|","^", "!" , "*" , "/" , "(" , ")" , "{" , "}" , "[" , "]" , "^" , "&" , "&&" , "<=" , ">=" , "<" , ">" , "-" , "%" , "*=" , "/=" , "%=" , "+" , "===" , "!==" , "!=" , "&&" , "||" , "," , "<<" , ">>" , ">>>" , "~"] , P.opLetter = oneOf "!%&()*+,-./:;<=>?[]^{|}~" , P.opStart = P.opLetter javaScriptStyle } binaryOp = [ "==" , "=" , "+=" , "-=" , "(", "[", "{", ",", "--" , "*" , "/" , "^" , "&" , "&&" , "|", "<=" , ">=" , "<" , ">" , "-", "%" , "*=" , "/=" , "%=" , "+" , "===" , "!==" , "!=" , "&&" , "||" , "," , "<<" , ">>" , ">>>" , "~"] javascript = P.makeTokenParser javaScriptStyle reserved = P.reserved javascript sortByLength = sortBy (\x y -> compare (length y) (length x)) {- oper = do{ c <- (P.opStart javaScriptStyle) ; cs <- many (P.opLetter javaScriptStyle) ; return (c:cs) } "operator" -} oper3 (c:cs) = try (do { s <- string c; return s} ) <|> oper3 cs oper3 [] = fail "" oper = oper3 (sortByLength $ P.reservedOpNames javaScriptStyle) --oper = oper' [] oper' acc= do{ c <- (P.opStart javaScriptStyle) ; let cs = acc++[c] ; if( elem cs (P.reservedOpNames javaScriptStyle)) then return cs else oper' cs } "operator" matchTok (c:cs) ((p,TokenROP t):ts) = t == [c] && matchTok cs ts matchTok [] _ = True matchTok _ _ = False merge ((p,TokenROP "+"):(_,TokenROP "+"):cs) = ((p,TokenROP "++"):merge cs) merge ((p,TokenROP "!"):(_,TokenROP "="):cs) = ((p,TokenROP "!="):merge cs) merge ((p,TokenROP "="):(_,TokenROP "="):cs) = ((p,TokenROP "=="):merge cs) merge ((p,TokenROP "+"):(_,TokenROP "="):cs) = ((p,TokenROP "+="):merge cs) merge ((p,TokenROP "-"):(_,TokenROP "="):cs) = ((p,TokenROP "-="):merge cs) merge (c:cs) = (c:merge cs) merge [] = [] data Token = TokenWhite | TokenInt Int | TokenIdent String | TokenStringLit String | TokenNL | TokenRegex (String,String) | TokenEof | TokenRID String | TokenROP String deriving (Show,Eq) -- -- Regular Expression Literals -- regex = do { char '/'; body <- do { c <- firstchar; cs <- many otherchar; return $ concat (c:cs) }; char '/'; flg <- identPart; return $ (body,flg) } firstchar = do { c <- satisfy (\c -> isPrint c && c /= '*' && c /= '\\' && c /= '/'); return [c]} <|> escapeseq escapeseq = do { char '\\'; c <- satisfy isPrint; return ['\\',c]} otherchar = do { c <- satisfy (\c -> isPrint c && c /= '\\' && c /= '/'); return [c]} <|> escapeseq identPart = many letter isOP c = do { c' <- char c; return $ TokenROP [c'] } isRes c = do { reserved c; return $ TokenRID c } resId = do { x <- many1 identChar; if (elem x $ P.reservedNames javaScriptStyle) then return x else unexpected "resId" } resOp = do { x <- oper; return x } identChar = satisfy (\c -> isAlphaNum c || c == '_') atoken = try (do { x <- many1 digit; return $ TokenInt $ read x }) <|> try (do { x <- resId; return $ TokenRID x }) <|> try (do { s <- regex; return $ TokenRegex s }) <|> try (do { x <- resOp; return $ TokenROP x } ) <|> try (do { cs <- many1 identChar ; return $ TokenIdent (cs)}) <|> try (do { char '\n'; return TokenNL }) <|> try (do { char '"'; x<- many stringCharDouble; char '"'; return $ TokenStringLit x }) <|> try (do { char '\''; x<- many stringCharSingle; char '\''; return $ TokenStringLit x }) <|> try (do { whiteSpace; return TokenWhite}) stringCharDouble = satisfy (\c -> isPrint c && c /= '"') stringCharSingle = satisfy (\c -> isPrint c && c /= '\'') lexer = many (do { p <- getPosition;t <- atoken; return (p,t) }) runLexer :: String -> [(SourcePos,Token)] runLexer s = case parse lexer "" s of Right l -> l Left _ -> [] whiteSpace = skipMany1 (satisfy (\c -> isSpace c && c /= '\n')) runIO :: Show a => Parser a -> String -> IO () runIO p input = case (parse p "" input) of Left err -> do{ putStr "parse error at " ; print err } Right x -> print x lexFile flags name = do input <- readFile name putStrLn $ show $ lexProgram input lexProgram input = runLexer $ processComments input