{------------------------------------------------------------------------------- Copyright: Bernie Pope 2007 Module: Parser Description: Baskell's parser for programs and expressions. Primary Authors: Bernie Pope -------------------------------------------------------------------------------} {- This file is part of baskell. baskell is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. baskell is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with baskell; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} module Parser ( parse , expParser ) where import Text.ParserCombinators.Parsec ( ParseError , runParser , many , many1 , eof , sepEndBy1 , pzero , (<|>) , between , sepBy , sepBy1 ) import Lexer ( lexer ) import AST ( Ident , Exp (..) , Lit (..) , Decl (..) , Program (..) , list ) import Type ( Type (..) ) import ParserUtils ( Parser , equals , rightArrow , comma , singleQuoted , leftRoundBracket , rightRoundBracket , leftSquareBracket , rightSquareBracket , backSlash , exclamation , semiColon , doubleColon , int , word ) import Control.Monad ( liftM ) import Data.Char ( isLower , isUpper ) -------------------------------------------------------------------------------- parse :: String -> String -> Either ParseError Program parse filename input = runParser programParser () filename $ lexer filename input -- programs programParser :: Parser Program programParser = liftM Program $ many declParser -- types typeParser :: Parser Type typeParser = do ts <- sepBy1 type2Parser rightArrow return $ foldr1 TFun ts type2Parser :: Parser Type type2Parser = tyVarConParser <|> tupleTyParser <|> listTyParser listTyParser :: Parser Type listTyParser = liftM TList $ between leftSquareBracket rightSquareBracket typeParser tupleTyParser :: Parser Type tupleTyParser = parenParser typeParser TTuple tyVarConParser :: Parser Type tyVarConParser = do str <- word if null str then fail "internal error: zero length identifier" else case head str of firstLetter | isLower firstLetter -> return $ TVar 0 | isUpper firstLetter -> parseCon str | otherwise -> fail $ "internal error: not ident or constructor: " ++ str where -- we have only a limited set of valid data constructors parseCon :: String -> Parser Type parseCon str = case str of "Int" -> return TInt "Bool" -> return TBool "Char" -> return TChar other -> fail $ "unexpected constructor: " ++ str -- declarations -- identifier = expression ; declParser :: Parser Decl declParser = do identifier <- ident parseFunDecl identifier <|> parseTySig identifier parseTySig :: Ident -> Parser Decl parseTySig identifier = do doubleColon t <- typeParser semiColon return $ Sig identifier t parseFunDecl :: Ident -> Parser Decl parseFunDecl identifier = do equals exp <- expParser semiColon return $ Decl identifier exp -- expressions -- * applications are indicated by juxtaposition, as in Haskell -- * a non-application expression is something other than an -- application -- * an application is a juxtaposition of non-application expressions -- * non-application expressions may contain sub-expressions which -- are application expressions, thus the two kinds are -- mutually recursive -- * we make the distinction between the two to avoid left-recursion -- in the grammar -- an application expression, or just a single non-application expression expParser :: Parser Exp expParser = do es <- many1 expParserNonApp case length es of 0 -> pzero 1 -> return $ head es n -> return $ foldl1 App es -- non-application expressions -- * variables -- * strict and non-strict lambda abstractions -- * literals -- * parenthesised expressions (including tuples) -- * lists expParserNonApp :: Parser Exp expParserNonApp = varConParser <|> lamParser <|> lamParserStrict <|> litParser <|> parenExpParser <|> listParser -- a variable (identifier) must start with a lower case -- alphabetic letter ident :: Parser String ident = do str <- word if null str then fail "internal error: zero length identifier" else case head str of firstLetter | isLower firstLetter -> return str | otherwise -> fail $ "invalid identifier: " ++ str -- parser for variables or constructors -- distinguised by the case of the first letter -- as with Haskell, data constuctors start with an uppercase -- and variables with a lowercase varConParser :: Parser Exp varConParser = do str <- word if null str then fail "internal error: zero length identifier" else case head str of firstLetter | isLower firstLetter -> return $ Var str | isUpper firstLetter -> parseCon str | otherwise -> fail $ "internal error: not ident or constructor: " ++ str where -- we have only a limited set of valid data constructors parseCon :: String -> Parser Exp parseCon str = case str of "True" -> return $ Literal $ LitBool True "False" -> return $ Literal $ LitBool False "Cons" -> return $ Literal LitCons other -> fail $ "unexpected constructor: " ++ str -- non-strict lambdas, start with a backslash lamParser :: Parser Exp lamParser = do backSlash (id, e) <- lamSubstance return $ Lam id e -- strict lambdas, start with an exclamation mark lamParserStrict :: Parser Exp lamParserStrict = do exclamation (id, e) <- lamSubstance return $ LamStrict id e -- the part of lambdas that doesn't change between lazy and strict lamSubstance :: Parser (Ident, Exp) lamSubstance = do id <- ident rightArrow e <- expParser return (id, e) -- wrapper for the literal parser litParser :: Parser Exp litParser = liftM Literal literalParser -- lists, uses the square bracket notation of Haskell -- with comma delimiters listParser :: Parser Exp listParser = liftM list $ between leftSquareBracket rightSquareBracket $ sepBy expParser comma -- could be a parenthesised expression or a tuple parenExpParser :: Parser Exp parenExpParser = parenParser expParser Tuple parenParser :: Parser a -> ([a] -> a) -> Parser a parenParser p tupler = do xs <- between leftRoundBracket rightRoundBracket (sepBy1 p comma) if length xs == 1 then return $ head xs else return $ tupler xs -- literals are ints or chars literalParser :: Parser Lit literalParser = litInt <|> litChar litInt :: Parser Lit litInt = liftM LitInt int -- chars are single quoted, if they are more than one character -- long then must check for validity litChar :: Parser Lit litChar = do charName <- singleQuoted case validateCharName charName of Nothing -> fail $ "badly formed character literal: " ++ charName Just c -> return $ LitChar c where validateCharName :: String -> Maybe Char validateCharName s | length s == 1 = Just $ head s -- the only valid mult-char character sequences (escaped chars) | otherwise = lookup s [("\\n",'\n'), ("\\t", '\t')]