{---- - Noodle.hs - noodle programming language parser ---- - Author: Jesse Rudolph - See LICENSE for licensing details ----------------------------------------------------------------- -} module Language.Noodle.Parsing.Noodle where import Language.Noodle.Syntax import Language.Noodle.Parsing.String -- specialization of the Parser type that fills in the state type as Unit type NParser a = Parser () a -- parse a declaration block without curly braces topDecls :: NParser Decls topDecls = do ds <- sepBy1 decl $ reserved ";" return $ Decls ds -- parse a single declaration, or a declaration block in curly braces decls :: NParser Decls decls = oneDecls <|> block topDecls where oneDecls = do d <- decl return $ Decls [d] -- parse a single declaration decl :: NParser Decl decl = intThunk <|> intFun <|> intOp <|> intMod <|> fail "expected declaration" where intThunk = do (n,c) <- clause ident comp return $ IntThunk n c intFun = do ((p,n),c) <- clause funHead comp return $ IntFun p n c where funHead = do p <- pattern n <- ident <|> oper return (p,n) intOp = do ((p1,o,p2),c) <- clause opHead comp return $ IntOp p1 o p2 c where opHead = do p1 <- pattern op <- oper p2 <- pattern return (p1,op,p2) intMod = do reserved "module" name <- ident ds <- decls return $ IntMod name ds -- parse a computational form comp :: NParser Comp comp = tryComp <|> withComp <|> inComp <|> patMatch <|> exprComp <|> fail "expected computation" where withComp = do reserved "with" mc <- comp reserved ";" rest <- comp return $ With mc rest inComp = do reserved "in" c <- comp reserved ";" ds <- decls return $ In ds c patMatch = do p <- pattern reserved "=" c1 <- comp reserved ";" c2 <- comp return $ PatMatch p c1 c2 tryComp = do reserved "handler" c1 <- comp reserved ";" c2 <- comp return $ Handler c1 c2 exprComp = do e <- expr return $ ExprComp e -- parse a 'clause'. This is used to parse a language syntactic idiom of the form 'lhs := rhs' use in declaration, and the computational forms 'if' and 'case' clause :: NParser a -> NParser b -> NParser (a,b) clause f s = do first <- f reserved ":=" second <- s return (first,second) -- this adds the parsing of enclosing curly braces to some other parser block :: NParser a -> NParser a block p = do lBrace r <- p rBrace return r {- Expression Parsers -} -- parser for expressive forms expr = exprOp -- parser for infix operator expressions exprOp = do app <- exprApp rest <- many subOp case rest of [] -> return app _ -> return $ (foldl (\a f -> f a) app rest) where subOp = do o <- oper a <- exprApp return $ (\a1 -> ExprOp a1 o a) -- parser for applicative expressions exprApp = do l <- modRef es <- many modRef case es of [] -> return l _ -> return $ foldl ExprApp l es -- parser for module reference expressions. NOTE, this does not parse . infix, it parses it prefix to an identifier modRef = do e <- exprLit refs <- many subRef case refs of [] -> return e _ -> return $ foldl ExprApp e refs where subRef = do reserved "." n <- (ident <|> oper) return (ModRef n) -- parser for expressive literals exprLit = do l <- literal comp return $ Lit l {-- MISC PARSERS --} -- pattern parser. as of now the only valid patterns are literals pattern :: NParser Pattern pattern = do l <- literal pattern return $ Pat l -- parser for literals literal :: NParser a -> NParser (Literal a) literal p = number <|> identifier <|> symbLit <|> stringLit <|> absLit p <|> prodLit p <|> paren p -- parse a literal identifier (as opposed to ident, which parses an identifier name) identifier = do i <- ident return (Ident i) -- parse a literal number number = do n <- numeric return $ Numb (read n) -- apply parenthesis to some other parser paren p = do lParen p' <- p rParen return $ Paren p' {-- TOKEN PARSERS --} -- predicate for determining if some string has a special use in the language, which may overlap with the specification of some other parser isReserved :: String -> Bool isReserved s = or $ map (==s) [ ":=" , "=:" , "=" , ";" , "." , "`" , "," , "(" , ")" , "with" , "in" , "handler" , "module" ] -- cheat parser for parsing a string as a token. removes leading whitespace and comments. eventually the special character parsers will go away and this will be used exclusively -- it makes other parser easier to read reserved :: String -> NParser () reserved s = notcode >> string s >> return () -- depricated: parse left curly brace, removing leading garbage lBrace :: NParser () lBrace = notcode >> char '{' >> return () -- depricated: parse right curly brace, removing leading garbage rBrace :: NParser () rBrace = notcode >> char '}' >> return () -- depricated: parse left parenthesis, removing leading garbage lParen :: NParser () lParen = notcode >> char '(' >> return () -- depricated: parse right parenthesis, removing leading garbage rParen :: NParser () rParen = notcode >> char ')' >> return () -- parse a valid identifier name ident :: NParser String ident = do notcode first <- alphaLower rest <- many (char '_' <|> alphaUpper <|> alphaLower <|> digit) let name = (first:rest) if isReserved name then fail $ "unexpected keyword: " ++ name else return name -- parse symbol literal, should be moved up to where previous literal sub-parsers are symbLit :: NParser (Literal a) symbLit = do notcode first <- alphaUpper rest <- many (char '_' <|> alphaUpper <|> alphaLower <|> digit) return $ Symb (first:rest) -- parse product literal, should be moved up to where previous literal sub-parsers are prodLit :: NParser a -> NParser (Literal a) prodLit p = do reserved "(" first <- p second <- many1 (reserved "," >> p) reserved ")" return $ ProdLit first (head second) (tail second) -- parse abstractor literal, should be moved up to where previous literal sub-parsers are absLit p = do reserved "[" l <- p reserved "]" return $ Abs l -- parse a valid operator name oper :: NParser String oper = do notcode op <- many1 opChar if isReserved op then fail $ "unexpected reserved operator: " ++ op else return op -- parse a valid number string numeric :: NParser String numeric = notcode >> many1 digit -- parse a single valid operator character opChar :: NParser Char opChar = oneOf "!@#$%^&*-+=|\\/<>,:~" opChars = many1 opChar -- parse a string literal, should be moved up to where previous literal sub-parsers are stringLit :: NParser (Literal a) stringLit = do notcode char '"' val <- many strChar char '"' return $ StrLit val where strChar = do c <- satisfy (\_ -> True) case c of '\\' -> satisfy (\_ -> True) -- naive escaping. just toss out the backslash and return the next character. allows for multiline strings without newline escaping '"' -> fail "unexpected quote" _ -> return c -- parser that eats whitespace and comments notcode :: NParser () notcode = do many (comment <|> (whiteChar >> return "")) return () -- comment parser comment :: NParser String comment = do char '`' cs <- many (satisfy nottick) char '`' return cs where nottick '`' = False nottick _ = True