-----------------------------------------------------------------------------
-- |
-- Module      :  Language.Glambda.Lex
-- Copyright   :  (C) 2015 Richard Eisenberg
-- License     :  BSD-style (see LICENSE)
-- Maintainer  :  Richard Eisenberg (rae@cs.brynmawr.edu)
-- Stability   :  experimental
--
-- Lexes a Glambda program string into a sequence of tokens
--
----------------------------------------------------------------------------

module Language.Glambda.Lex ( lexG, lex ) where

import Prelude hiding ( lex )

import Language.Glambda.Token
import Language.Glambda.Monad
import Language.Glambda.Util

import Text.Parsec.Prim  ( Parsec, parse, getPosition, try )
import Text.Parsec.Combinator
import Text.Parsec.Char
import Text.Parsec.Token as Parsec
import Text.Parsec.Language

import Data.Maybe

import Control.Applicative
import Control.Arrow as Arrow

type Lexer = Parsec String ()

---------------------------------------------------
-- Utility
string_ :: String -> Lexer ()
string_ = ignore . string

---------------------------------------------------
-- | Lex some program text into a list of 'LToken's, aborting upon failure
lexG :: String -> GlamE [LToken]
lexG = eitherToGlamE . lex

-- | Lex some program text into a list of 'LToken's
lex :: String -> Either String [LToken]
lex = Arrow.left show . parse lexer ""

-- | Overall lexer
lexer :: Lexer [LToken]
lexer = (catMaybes <$> many lexer1_ws) <* eof

-- | Lex either one token or some whitespace
lexer1_ws :: Lexer (Maybe LToken)
lexer1_ws
  = (Nothing <$ whitespace)
    <|>
    (Just <$> lexer1)

-- | Lex some whitespace
whitespace :: Lexer ()
whitespace
  = choice [ ignore $ some space
           , block_comment
           , line_comment ]

-- | Lex a @{- ... -}@ comment (perhaps nested); consumes no input
-- if the target doesn't start with @{-@.
block_comment :: Lexer ()
block_comment = do
  try $ string_ "{-"
  comment_body

-- | Lex a block comment, without the opening "{-"
comment_body :: Lexer ()
comment_body
  = choice [ block_comment *> comment_body
           , try $ string_ "-}"
           , anyChar *> comment_body ]

-- | Lex a line comment
line_comment :: Lexer ()
line_comment = do
  try $ string_ "--"
  ignore $ manyTill anyChar (eof <|> ignore newline)

-- | Lex one token
lexer1 :: Lexer LToken
lexer1 = do
  pos <- getPosition
  L pos <$> choice [ symbolic
                   , word_token
                   , Int . fromInteger <$> Parsec.natural haskell ]

-- | Lex one non-alphanumeric token
symbolic :: Lexer Token
symbolic = choice [ LParen  <$  char '('
                  , RParen  <$  char ')'
                  , Lambda  <$  char '\\'
                  , Dot     <$  char '.'
                  , Arrow   <$  try (string "->")
                  , Colon   <$  char ':'
                  , ArithOp <$> arith_op
                  , Assign  <$  char '='
                  , Semi    <$  char ';' ]

-- | Lex one arithmetic operator
arith_op :: Lexer UArithOp
arith_op = choice [ UArithOp Plus     <$ char '+'
                  , UArithOp Minus    <$ char '-'
                  , UArithOp Times    <$ char '*'
                  , UArithOp Divide   <$ char '/'
                  , UArithOp Mod      <$ char '%'
                  , UArithOp LessE    <$ try (string "<=")
                  , UArithOp Less     <$ char '<'
                  , UArithOp GreaterE <$ try (string ">=")
                  , UArithOp Greater  <$ char '>'
                  , UArithOp Equals   <$ try (string "==")]

-- | Lex one alphanumeric token
word_token :: Lexer Token
word_token = to_token <$> word
  where
    to_token "true"  = Bool True
    to_token "false" = Bool False
    to_token "if"    = If
    to_token "then"  = Then
    to_token "else"  = Else
    to_token "fix"   = FixT
    to_token other   = Name other

-- | Lex one word
word :: Lexer String
word = ((:) <$> (letter <|> char '_') <*>
                 (many (alphaNum <|> char '_')))