module Lexer (tokenize) where

import Control.Applicative ((<|>), (<$>), (<*>))
import Data.Char (isSymbol)
import Text.Parsec (Parsec, alphaNum, anyChar, char, choice, digit, eof, letter,
                    many, many1, manyTill, newline, notFollowedBy, oneOf, parse,
                    satisfy, string, try, upper)
import Tokens
import Control.Monad (liftM)

token :: Parsec String u Token
token = NUMBER <$> integer
    <|> whitespace
    <|> chrs [ ('(',LPAREN)  , (')',RPAREN)
             , ('{',LBRACE)  , ('}',RBRACE)
             , ('[',LBRACKET), (']',RBRACKET)
             , (',',COMMA)   , (';',SEMI), ('_',UNDERSCORE)
             , ('\\',LAMBDA) , ('\x03BB',LAMBDA) ]
    <|> reserveds [ ("True",TRUE), ("False",FALSE)
                  , ("if",IF), ("then",THEN), ("else",ELSE)
                  , ("case",CASE), ("of",OF), ("data", DATA) 
                  , ("let",LET), ("in",IN) ]
    <|> do { char '`'; v <- variable; char '`'; return $ OP v }
    <|> anyOp
    <|> do { char '"'; s <- many $ backslashed <|> satisfy (/='"'); char '"'
           ; return $ STRING s}
    <|> do { char '\''; c <- backslashed <|> satisfy (/='\''); char '\''
           ; return $ CHAR c}
    <|> (ID <$> variable)
    <|> typeVar
    <|> do { try $ string "\r\n" <|> string "\n"; return NEWLINE }

chrs :: [(Char, Token)] -> Parsec String u Token
chrs = choice . map chr
    where chr (c, t) = char c >> return t

reserveds :: [(String, Token)] -> Parsec String u Token
reserveds = choice . map reserved
    where reserved (s, t) = try $ string s >>
                                  notFollowedBy (alphaNum <|> char '_') >>
                                  return t

anyOp = do op <- many1 (satisfy isSymbol <|> oneOf "+-/*=.$<>:&|^?%#@~!")
           case op of { ".." -> return DOT2
                      ; "->" -> return ARROW; "\8594" -> return ARROW
                      ; _ -> return $ OP op }

backslashed :: Parsec String u Char
backslashed = do { char '\\'; c <- anyChar
                 ; return . read $ ['\'','\\',c,'\''] }

integer :: Parsec String u Int
integer = read <$> many1 digit

variable :: Parsec String u String
variable = identifier $ letter <|> char '_'

typeVar :: Parsec String u Token
typeVar = TYPE <$> identifier upper

identifier :: Parsec String u Char -> Parsec String u String
identifier c = (:) <$> c <*> (many $ alphaNum <|> oneOf "_\'")

----  White Space and comments  ----

whitespace = (lineComment >> return NEWLINE) <|>
             (many1 (multiComment <|> many1 (char ' ')) >> return SPACES)
lineComment = do try $ string "--"
                 manyTill anyChar $ newline <|> (eof >> return '\n')

multiComment = do { try $ string "{-"; closeComment }
closeComment = manyTill anyChar . choice $
               [ try $ string "-}"
               , do { try $ string "{-"; closeComment; closeComment }
               ]

tokenParser :: Parsec String u [Token]
tokenParser = many1 token

tokenize :: String -> Either String [Token]
tokenize s = case parse tokenParser "" s of
               Right ts -> Right ts
               Left err -> Left $ "Syntax error: " ++ show err