module HJS.Parser.Lexer where
import Data.Char
import Data.Set
import Data.List
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import qualified Text.ParserCombinators.Parsec.Token as P
import Text.ParserCombinators.Parsec.Language (javaStyle)
import HJS.Parser.Utils
javaScriptStyle= javaStyle {
P.reservedNames = [ "break","else","new","var","case",
"finally","return","void","catch","for","switch","while",
"continue","function","this","with","default","if",
"throw","delete","in","try","do","instanceof","typeof",
"abstract","enum","int","short",
"boolean","export","interface","static",
"byte","extends","long","super",
"char","final","native","synchronized",
"class","float","package","throws",
"const","goto","private","transient",
"debugger","implements","protected","volatile",
"double","import","public",
"true", "false", "null"
]
, P.reservedOpNames = [ "==" , "=" , "+=" , "-=" , "++" ,
"--" , ";" , ":" , "?" , "," , "." , "|","^",
"!" , "*" , "/" , "(" , ")" , "{" , "}" , "[" , "]" , "^" , "&" , "&&" ,
"<=" , ">=" , "<" , ">" , "-" , "%" , "*=" , "/=" , "%=" , "+" ,
"===" , "!==" , "!=" , "&&" , "||" , "," , "<<" , ">>" , ">>>" , "~"]
, P.opLetter = oneOf "!%&()*+,-./:;<=>?[]^{|}~"
, P.opStart = P.opLetter javaScriptStyle
}
binaryOp = [ "==" , "=" , "+=" , "-=" , "(", "[", "{", ",",
"--" ,
"*" , "/" , "^" , "&" , "&&" , "|",
"<=" , ">=" , "<" , ">" , "-", "%" , "*=" , "/=" , "%=" , "+" ,
"===" , "!==" , "!=" , "&&" , "||" , "," , "<<" , ">>" , ">>>" , "~"]
javascript = P.makeTokenParser javaScriptStyle
reserved = P.reserved javascript
sortByLength = sortBy (\x y -> compare (length y) (length x))
oper3 (c:cs) = try (do { s <- string c; return s} )
<|> oper3 cs
oper3 [] = fail ""
oper = oper3 (sortByLength $ P.reservedOpNames javaScriptStyle)
oper' acc= do{ c <- (P.opStart javaScriptStyle)
; let cs = acc++[c]
; if( elem cs (P.reservedOpNames javaScriptStyle)) then return cs else oper' cs
}
<?> "operator"
matchTok (c:cs) ((p,TokenROP t):ts) = t == [c] && matchTok cs ts
matchTok [] _ = True
matchTok _ _ = False
merge ((p,TokenROP "+"):(_,TokenROP "+"):cs) = ((p,TokenROP "++"):merge cs)
merge ((p,TokenROP "!"):(_,TokenROP "="):cs) = ((p,TokenROP "!="):merge cs)
merge ((p,TokenROP "="):(_,TokenROP "="):cs) = ((p,TokenROP "=="):merge cs)
merge ((p,TokenROP "+"):(_,TokenROP "="):cs) = ((p,TokenROP "+="):merge cs)
merge ((p,TokenROP "-"):(_,TokenROP "="):cs) = ((p,TokenROP "-="):merge cs)
merge (c:cs) = (c:merge cs)
merge [] = []
data Token
=
TokenWhite
| TokenInt Int
| TokenIdent String
| TokenStringLit String
| TokenNL
| TokenRegex (String,String)
| TokenEof
| TokenRID String
| TokenROP String
deriving (Show,Eq)
regex = do { char '/'; body <- do { c <- firstchar; cs <- many otherchar; return $ concat (c:cs) }; char '/'; flg <- identPart; return $ (body,flg) }
firstchar = do { c <- satisfy (\c -> isPrint c && c /= '*' && c /= '\\' && c /= '/'); return [c]} <|> escapeseq
escapeseq = do { char '\\'; c <- satisfy isPrint; return ['\\',c]}
otherchar = do { c <- satisfy (\c -> isPrint c && c /= '\\' && c /= '/'); return [c]} <|> escapeseq
identPart = many letter
isOP c = do { c' <- char c; return $ TokenROP [c'] }
isRes c = do { reserved c; return $ TokenRID c }
resId = do { x <- many1 identChar; if (elem x $ P.reservedNames javaScriptStyle) then return x else unexpected "resId" }
resOp = do { x <- oper; return x }
identChar = satisfy (\c -> isAlphaNum c || c == '_')
atoken =
try (do { x <- many1 digit; return $ TokenInt $ read x })
<|> try (do { x <- resId; return $ TokenRID x })
<|> try (do { s <- regex; return $ TokenRegex s })
<|> try (do { x <- resOp; return $ TokenROP x } )
<|> try (do { cs <- many1 identChar ; return $ TokenIdent (cs)})
<|> try (do { char '\n'; return TokenNL })
<|> try (do { char '"'; x<- many stringCharDouble; char '"'; return $ TokenStringLit x })
<|> try (do { char '\''; x<- many stringCharSingle; char '\''; return $ TokenStringLit x })
<|> try (do { whiteSpace; return TokenWhite})
stringCharDouble = satisfy (\c -> isPrint c && c /= '"')
stringCharSingle = satisfy (\c -> isPrint c && c /= '\'')
lexer = many (do { p <- getPosition;t <- atoken; return (p,t) })
runLexer :: String -> [(SourcePos,Token)]
runLexer s = case parse lexer "" s of
Right l -> l
Left _ -> []
whiteSpace = skipMany1 (satisfy (\c -> isSpace c && c /= '\n'))
runIO :: Show a => Parser a -> String -> IO ()
runIO p input
= case (parse p "" input) of
Left err -> do{ putStr "parse error at "
; print err
}
Right x -> print x
lexFile flags name = do
input <- readFile name
putStrLn $ show $ lexProgram input
lexProgram input = runLexer $ processComments input