{-------------------------------------------------------------------------------

        Copyright:              Bernie Pope 2007

        Module:                 Parser

        Description:            Baskell's parser for programs and expressions.

        Primary Authors:        Bernie Pope

-------------------------------------------------------------------------------}

{-
    This file is part of baskell.

    baskell is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    baskell is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with baskell; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

module Parser
   ( parse
   , expParser
   )
   where

import Text.ParserCombinators.Parsec
   ( ParseError
   , runParser
   , many
   , many1
   , eof
   , sepEndBy1
   , pzero
   , (<|>)
   , between
   , sepBy
   , sepBy1
   )

import Lexer
   ( lexer )

import AST
   ( Ident
   , Exp (..)
   , Lit (..)
   , Decl (..)
   , Program (..)
   , list
   )

import Type
   ( Type (..) )

import ParserUtils
   ( Parser
   , equals
   , rightArrow
   , comma
   , singleQuoted
   , leftRoundBracket
   , rightRoundBracket
   , leftSquareBracket
   , rightSquareBracket
   , backSlash
   , exclamation
   , semiColon
   , doubleColon
   , int
   , word
   )

import Control.Monad
  ( liftM )

import Data.Char
  ( isLower
  , isUpper
  )

--------------------------------------------------------------------------------

parse :: String -> String -> Either ParseError Program
parse filename input
   = runParser programParser () filename $ lexer filename input

-- programs
programParser :: Parser Program
programParser = liftM Program $ many declParser

-- types
typeParser :: Parser Type
typeParser = do
   ts <- sepBy1 type2Parser rightArrow
   return $ foldr1 TFun ts

type2Parser :: Parser Type
type2Parser =
   tyVarConParser <|>
   tupleTyParser  <|>
   listTyParser

listTyParser :: Parser Type
listTyParser =
   liftM TList $
      between leftSquareBracket rightSquareBracket typeParser

tupleTyParser :: Parser Type
tupleTyParser = parenParser typeParser TTuple

tyVarConParser :: Parser Type
tyVarConParser = do
   str <- word
   if null str
      then fail "internal error: zero length identifier"
      else case head str of
         firstLetter
            | isLower firstLetter -> return $ TVar 0
            | isUpper firstLetter -> parseCon str
            | otherwise
                 -> fail $ "internal error: not ident or constructor: " ++ str
   where
   -- we have only a limited set of valid data constructors
   parseCon :: String -> Parser Type
   parseCon str
      = case str of
           "Int"  -> return TInt
           "Bool" -> return TBool
           "Char" -> return TChar
           other  -> fail $ "unexpected constructor: " ++ str

-- declarations
--    identifier = expression ;
declParser :: Parser Decl
declParser = do
   identifier <- ident
   parseFunDecl identifier <|> parseTySig identifier

parseTySig :: Ident -> Parser Decl
parseTySig identifier = do
   doubleColon
   t <- typeParser
   semiColon
   return $ Sig identifier t

parseFunDecl :: Ident -> Parser Decl
parseFunDecl identifier = do
   equals
   exp <- expParser
   semiColon
   return $ Decl identifier exp

-- expressions
--     * applications are indicated by juxtaposition, as in Haskell
--     * a non-application expression is something other than an
--       application
--     * an application is a juxtaposition of non-application expressions
--     * non-application expressions may contain sub-expressions which
--       are application expressions, thus the two kinds are
--       mutually recursive
--     * we make the distinction between the two to avoid left-recursion
--       in the grammar

-- an application expression, or just a single non-application expression
expParser :: Parser Exp
expParser = do
   es <- many1 expParserNonApp
   case length es of
      0 -> pzero
      1 -> return $ head es
      n -> return $ foldl1 App es

-- non-application expressions
--    * variables
--    * strict and non-strict lambda abstractions
--    * literals
--    * parenthesised expressions (including tuples)
--    * lists
expParserNonApp :: Parser Exp
expParserNonApp
   = varConParser
     <|> lamParser
     <|> lamParserStrict
     <|> litParser
     <|> parenExpParser
     <|> listParser

-- a variable (identifier) must start with a lower case
-- alphabetic letter
ident :: Parser String
ident = do
   str <- word
   if null str
      then fail "internal error: zero length identifier"
      else case head str of
         firstLetter
            | isLower firstLetter -> return str
            | otherwise -> fail $ "invalid identifier: " ++ str

-- parser for variables or constructors
-- distinguised by the case of the first letter
-- as with Haskell, data constuctors start with an uppercase
-- and variables with a lowercase
varConParser :: Parser Exp
varConParser = do
   str <- word
   if null str
      then fail "internal error: zero length identifier"
      else case head str of
         firstLetter
            | isLower firstLetter -> return $ Var str
            | isUpper firstLetter -> parseCon str
            | otherwise
                 -> fail $ "internal error: not ident or constructor: " ++ str
   where
   -- we have only a limited set of valid data constructors
   parseCon :: String -> Parser Exp
   parseCon str
      = case str of
           "True"  -> return $ Literal $ LitBool True
           "False" -> return $ Literal $ LitBool False
           "Cons"  -> return $ Literal LitCons
           other   -> fail $ "unexpected constructor: " ++ str

-- non-strict lambdas, start with a backslash
lamParser :: Parser Exp
lamParser = do
   backSlash
   (id, e) <- lamSubstance
   return $ Lam id e

-- strict lambdas, start with an exclamation mark
lamParserStrict :: Parser Exp
lamParserStrict = do
   exclamation
   (id, e) <- lamSubstance
   return $ LamStrict id e

-- the part of lambdas that doesn't change between lazy and strict
lamSubstance :: Parser (Ident, Exp)
lamSubstance = do
   id <- ident
   rightArrow
   e <- expParser
   return (id, e)

-- wrapper for the literal parser
litParser :: Parser Exp
litParser = liftM Literal literalParser

-- lists, uses the square bracket notation of Haskell
-- with comma delimiters
listParser :: Parser Exp
listParser =
   liftM list $ between leftSquareBracket rightSquareBracket $
                sepBy expParser comma

-- could be a parenthesised expression or a tuple
parenExpParser :: Parser Exp
parenExpParser = parenParser expParser Tuple

parenParser :: Parser a -> ([a] -> a) -> Parser a
parenParser p tupler = do
   xs <- between leftRoundBracket rightRoundBracket (sepBy1 p comma)
   if length xs == 1
      then return $ head xs
      else return $ tupler xs

-- literals are ints or chars
literalParser :: Parser Lit
literalParser = litInt <|> litChar

litInt :: Parser Lit
litInt = liftM LitInt int

-- chars are single quoted, if they are more than one character
-- long then must check for validity
litChar :: Parser Lit
litChar
   = do charName <- singleQuoted
        case validateCharName charName of
           Nothing -> fail $ "badly formed character literal: " ++ charName
           Just c  -> return $ LitChar c
   where
   validateCharName :: String -> Maybe Char
   validateCharName s
      | length s == 1 = Just $ head s
      -- the only valid mult-char character sequences (escaped chars)
      | otherwise = lookup s [("\\n",'\n'), ("\\t", '\t')]