{----
 - Noodle.hs - noodle programming language parser
 ----
 - Author: Jesse Rudolph <jesse.rudolph@gmail.com>
 - See LICENSE for licensing details
 ----------------------------------------------------------------- -}

module Language.Noodle.Parsing.Noodle where

import Language.Noodle.Syntax
import Language.Noodle.Parsing.String

-- specialization of the Parser type that fills in the state type as Unit
type NParser a = Parser () a


-- parse a declaration block without curly braces
topDecls :: NParser Decls
topDecls = do ds <- sepBy1 decl $ reserved ";"
              return $ Decls ds


-- parse a single declaration, or a declaration block in curly braces
decls :: NParser Decls
decls = oneDecls <|> block topDecls where
    oneDecls = do d <- decl
                  return $ Decls [d]


--  parse a single declaration
decl ::  NParser Decl
decl = intThunk <|> intFun <|> intOp <|> intMod <|>
       fail "expected declaration" where
    intThunk = do (n,c) <- clause ident comp
                  return $ IntThunk n c
    intFun   = do ((p,n),c) <- clause funHead comp
                  return $ IntFun p n c where
                      funHead = do p <- pattern
                                   n <- ident <|> oper
                                   return (p,n)
    intOp    = do ((p1,o,p2),c) <- clause opHead comp
                  return $ IntOp p1 o p2 c where
                      opHead = do p1 <- pattern
                                  op <- oper
                                  p2 <- pattern
                                  return (p1,op,p2)
    intMod   = do reserved "module"
                  name <- ident
                  ds   <- decls
                  return $ IntMod name ds


-- parse a computational form
comp :: NParser Comp
comp = tryComp <|> withComp <|>
       inComp  <|> patMatch <|> exprComp
           <|> fail "expected computation" where
    withComp = do reserved "with"
                  mc <- comp
                  reserved ";"
                  rest <- comp
                  return $ With mc rest
    inComp   = do reserved "in"
                  c <- comp
                  reserved ";"
                  ds <- decls
                  return $ In ds c
    patMatch = do p <- pattern
                  reserved "="
                  c1 <- comp
                  reserved ";"
                  c2 <- comp
                  return $ PatMatch p c1 c2
    tryComp = do reserved "handler"
                 c1 <- comp
                 reserved ";"
                 c2 <- comp
                 return $ Handler c1 c2
    exprComp = do e <- expr
                  return $ ExprComp e


-- parse a 'clause'. This is used to parse a language syntactic idiom of the form 'lhs := rhs' use in declaration, and the computational forms 'if' and 'case'
clause :: NParser a -> NParser b -> NParser (a,b)
clause f s = do first  <- f
                reserved ":="
                second <- s
                return (first,second)


-- this adds the parsing of enclosing curly braces to some other parser
block :: NParser a -> NParser a
block p = do lBrace
             r <- p
             rBrace
             return r


{- Expression Parsers -}

   -- parser for expressive forms
expr = exprOp


-- parser for infix operator expressions
exprOp   = do app <- exprApp
              rest <- many subOp
              case rest of
                   [] -> return app
                   _  -> return $ (foldl (\a f -> f a) app rest)
    where
        subOp = do o <- oper
                   a <- exprApp
                   return $ (\a1 -> ExprOp a1 o a)


-- parser for applicative expressions
exprApp  = do l <- modRef
              es <- many modRef
              case es of
                   [] -> return l
                   _  -> return $ foldl ExprApp l es


-- parser for module reference expressions. NOTE, this does not parse . infix, it parses it prefix to an identifier
modRef   = do e <- exprLit
              refs <- many subRef
              case refs of
                   [] -> return e
                   _  -> return $ foldl ExprApp e refs
    where
        subRef = do reserved "."
                    n <- (ident <|> oper)
                    return (ModRef n)

-- parser for expressive literals
exprLit  = do l <- literal comp
              return $ Lit l


{-- MISC PARSERS --}

-- pattern parser. as of now the only valid patterns are literals
pattern :: NParser Pattern
pattern = do l <- literal pattern
             return $ Pat l


-- parser for literals
literal :: NParser a -> NParser (Literal a)
literal p = number <|> identifier <|> symbLit <|> stringLit <|> absLit p <|> prodLit p <|> paren p


-- parse a literal identifier (as opposed to ident, which parses an identifier name)
identifier =
    do i <- ident
       return (Ident i)


-- parse a literal number
number = do n <- numeric
            return $ Numb (read n)


-- apply parenthesis to some other parser
paren p =
    do lParen
       p' <- p
       rParen
       return $ Paren p'


{-- TOKEN PARSERS --}

-- predicate for determining if some string has a special use in the language, which may overlap with the specification of some other parser
isReserved :: String -> Bool
isReserved s = or $ map (==s)
    [ ":="
    , "=:"
    , "="
    , ";"
    , "."
    , "`"
    , ","
    , "("
    , ")"
    , "with"
    , "in"
    , "handler"
    , "module" ]


-- cheat parser for parsing a string as a token. removes leading whitespace and comments. eventually the special character parsers will go away and this will be used exclusively
-- it makes other parser easier to read
reserved :: String -> NParser ()
reserved s = notcode >> string s >> return ()


-- depricated: parse left curly brace, removing leading garbage
lBrace :: NParser ()
lBrace = notcode >> char '{' >> return ()


-- depricated: parse right curly brace, removing leading garbage
rBrace :: NParser ()
rBrace = notcode >> char '}' >> return ()

-- depricated: parse left parenthesis, removing leading garbage
lParen :: NParser ()
lParen = notcode >> char '(' >> return ()

-- depricated: parse right parenthesis, removing leading garbage
rParen :: NParser ()
rParen = notcode >> char ')' >> return ()


-- parse a valid identifier name
ident :: NParser String
ident = do notcode
           first <- alphaLower
           rest <- many (char '_' <|> alphaUpper <|> alphaLower <|> digit)
           let name = (first:rest)
           if isReserved name
              then fail $ "unexpected keyword: " ++ name
              else return name


-- parse symbol literal, should be moved up to where previous literal sub-parsers are
symbLit :: NParser (Literal a)
symbLit = do notcode
             first <- alphaUpper
             rest <- many (char '_' <|> alphaUpper <|> alphaLower <|> digit)
             return $ Symb (first:rest)


-- parse product literal, should be moved up to where previous literal sub-parsers are
prodLit :: NParser a -> NParser (Literal a)
prodLit p = do reserved "("
               first <- p
               second <- many1 (reserved "," >> p)
               reserved ")"
               return $ ProdLit first (head second) (tail second)


-- parse abstractor literal, should be moved up to where previous literal sub-parsers are
absLit p = do reserved "["
              l <- p
              reserved "]"
              return $ Abs l


-- parse a valid operator name
oper :: NParser String
oper = do notcode
          op <- many1 opChar
          if isReserved op
             then fail $ "unexpected reserved operator: " ++ op
             else return op


-- parse a valid number string
numeric :: NParser String
numeric = notcode >> many1 digit


-- parse a single valid operator character
opChar :: NParser Char
opChar = oneOf "!@#$%^&*-+=|\\/<>,:~"
opChars = many1 opChar


-- parse a string literal, should be moved up to where previous literal sub-parsers are
stringLit :: NParser (Literal a)
stringLit = do notcode
               char '"'
               val <- many strChar
               char '"'
               return $  StrLit val where
                   strChar
                        = do c <- satisfy (\_ -> True)
                             case c of
                                '\\' -> satisfy (\_ -> True) -- naive escaping. just toss out the backslash and return the next character. allows for multiline strings without newline escaping
                                '"'  -> fail "unexpected quote"
                                _    -> return c


-- parser that eats whitespace and comments
notcode :: NParser ()
notcode = do many (comment <|> (whiteChar >> return ""))
             return ()


-- comment parser
comment :: NParser String
comment = do char '`'
             cs <- many (satisfy nottick)
             char '`'
             return cs where
                 nottick '`' = False
                 nottick _   = True