{-| Module      :  Lexer
    License     :  GPL

    Maintainer  :  helium@cs.uu.nl
    Stability   :  experimental
    Portability :  portable
-}

module Helium.Parser.Lexer
    ( lexer, strategiesLexer
    , Token, Lexeme(..)
    , lexemeLength
    , module Helium.Parser.LexerMessage
    ) where


import Helium.Main.Args
import Helium.Parser.LexerMonad
import Helium.Parser.LexerMessage
import Helium.Parser.LexerToken
import Text.ParserCombinators.Parsec.Pos
import Helium.Utils.Utils(internalError, hole)

import Control.Monad(when, liftM)
import Data.Char(ord)
import Data.List(isPrefixOf)

lexer :: [Option] -> String -> [Char] -> Either LexerError ([Token], [LexerWarning])
lexer opts fileName input = runLexerMonad opts fileName (mainLexer input)

strategiesLexer :: [Option] -> String -> [Char] -> Either LexerError ([Token], [LexerWarning])
strategiesLexer opts fileName input = 
    case lexer opts fileName input of
        Left err -> Left err
        Right (tokens, warnings) -> Right (reserveStrategyNames tokens, warnings)
        
type Lexer = [Char] -> LexerMonad [Token]

mainLexer :: Lexer
mainLexer a =
  do useTutor <- elem UseTutor `liftM` getOpts
     mainLexer' useTutor a

mainLexer' :: Bool -> Lexer
mainLexer' _ [] = do
    checkBracketsAtEOF
    pos <- getPos
    return [(incSourceLine (setSourceColumn pos 0) 1, LexEOF)]

mainLexer' _ ('-':'-':cs) 
    | not (nextCharSatisfy isSymbol rest) = do
        incPos (2 + length minuses)
        lexOneLineComment rest
    where
        (minuses, rest) = span (== '-') cs

mainLexer' useTutor ('{':'-':'#':' ':'M':'U':'S':'T':'U':'S':'E':' ':'#':'-':'}':cs) | useTutor = 
   returnToken LexMustUse 15 mainLexer cs

mainLexer' useTutor ('{':'-':'#':' ':'F':'C':cs) | useTutor = do 
    pos <- getPos 
    lexCaseFeedbackComment "" pos cs

mainLexer' useTutor ('{':'-':'#':' ':'F':cs) | useTutor = do 
    pos <- getPos 
    incPos 5
    lexFeedbackComment "" pos cs 

mainLexer' _ ('{':'-':cs) = do 
    pos <- getPos 
    incPos 2
    lexMultiLineComment [pos] 0 cs 
        
mainLexer' _ input@('\'':_) = 
    lexChar input

mainLexer' _ input@('"':_) = 
    lexString input

-- warn if we see something like ".2"
mainLexer' _ ('.':c:cs) 
    | myIsDigit c = do
        pos <- getPos
        lexerWarning (LooksLikeFloatNoDigits (takeWhile myIsDigit (c:cs))) pos
        returnToken (LexVarSym ".") 1 mainLexer (c:cs)
        
mainLexer' useTutor input@(c:cs) 
    | myIsLower c || c == '_' = -- variable or keyword
        lexName isLetter LexVar LexKeyword keywords input
    | myIsSpace c = do
        when (c == '\t') $ do
            pos <- getPos
            lexerWarning TabCharacter pos
        nextPos c 
        mainLexer cs        
    | myIsUpper c = -- constructor
        lexName isLetter LexCon (internalError "Lexer" "mainLexer'" "constructor") [] input
    | c == ':' = -- constructor operator
        lexName isSymbol LexConSym LexResConSym reservedConSyms input
    | isSymbol c = -- variable operator
        lexName isSymbol LexVarSym LexResVarSym (if useTutor then hole : reservedVarSyms else reservedVarSyms) input
    | c `elem` "([{" = do
        openBracket c
        returnToken (LexSpecial c) 1 mainLexer cs
    | c `elem` ")]}" = do
        closeBracket c
        returnToken (LexSpecial c) 1 mainLexer cs
    | c `elem` specialsWithoutBrackets = 
        returnToken (LexSpecial c) 1 mainLexer cs
    | myIsDigit c = -- number
        lexIntFloat input
    | otherwise = do
        pos <- getPos
        lexerError (UnexpectedChar c) pos

lexName :: (Char -> Bool) -> (String -> Lexeme) -> 
                (String -> Lexeme) -> [String] -> Lexer
lexName predicate normal reserved reserveds cs = do
    let (name@(first:_), rest) = span predicate cs
        lexeme = if name `elem` reserveds
                 then reserved name 
                 else normal   name
    when ((isSymbol first || first == ':') && name `contains` "--") $ do
        pos <- getPos
        lexerWarning CommentOperator pos
    returnToken lexeme (length name) mainLexer rest

contains :: Eq a => [a] -> [a] -> Bool
[] `contains` _ = False
xs@(_:rest) `contains` ys = ys `isPrefixOf` xs || rest `contains` ys

-----------------------------------------------------------
-- Numbers
-----------------------------------------------------------

lexIntFloat :: Lexer
lexIntFloat input = do
    _ <- getPos
    let (digits, rest) = span myIsDigit input
    case rest of
        ('.':rest2@(next:_)) 
            | myIsDigit next -> do
                let (fraction, rest3) = span myIsDigit rest2
                    prefix = digits ++ "." ++ fraction
                lexMaybeExponent prefix LexFloat rest3
            | next /= '.' -> do
                pos <- getPos
                lexerWarning (LooksLikeFloatNoFraction digits) pos
                lexMaybeExponent digits LexInt rest
        _ ->
            lexMaybeExponent digits LexInt rest

lexMaybeExponent :: String -> (String -> Lexeme) -> Lexer
lexMaybeExponent prefix lexemeFun input = 
    case input of
        ('e':'+':rest) ->
            lexExponent (prefix ++ "e+") rest
        ('E':'+':rest) ->
            lexExponent (prefix ++ "E+") rest
        ('e':'-':rest) ->
            lexExponent (prefix ++ "e-") rest
        ('E':'-':rest) ->
            lexExponent (prefix ++ "E-") rest
        ('e':rest) ->
            lexExponent (prefix ++ "e") rest
        ('E':rest) ->
            lexExponent (prefix ++ "E") rest
        _ ->
            returnToken (lexemeFun prefix) (length prefix) mainLexer input

lexExponent :: String -> Lexer
lexExponent prefix input = do
    startPos <- getPos
    let posAtExponent = addPos (length prefix) startPos
        (exponentDigits, rest) = span myIsDigit input
    if null exponentDigits then
        lexerError MissingExponentDigits posAtExponent
     else do
        let text = prefix ++ exponentDigits
        returnToken (LexFloat text) (length text) mainLexer rest

-----------------------------------------------------------
-- Characters
-----------------------------------------------------------

lexChar :: Lexer
lexChar input = do 
    pos <- getPos
    case input of
        '\'':'\\':c:'\'':cs -> -- '\n' etc
            if c `elem` escapeChars then    
                returnToken (LexChar ['\\',c]) 4 mainLexer cs
            else
                lexerError IllegalEscapeInChar pos
        '\'':'\'':_ -> -- ''
            lexerError EmptyChar pos
        '\'':c:'\'':cs -> -- 'a' '%'
            if ord c >= 32 && ord c <= 126 then 
                returnToken (LexChar [c]) 3 mainLexer cs
            else
                lexerError IllegalCharInChar pos
        ['\''] -> -- ' at end of file
            lexerError EOFInChar pos
        ('\'':cs) -> -- if there is a name between single quotes, we give a hint that backquotes might be meant 
            let (ds, es) = span (/= '\'') cs
                ws = words ds
            in if not (null es) && head es == '\'' && length ws == 1 && isName (head ws) then
                    lexerError (NonTerminatedChar (Just (head ws))) pos
               else 
                    lexerError (NonTerminatedChar Nothing) pos
        _ -> internalError "Lexer" "lexChar" "unexpected characters"

lexString :: Lexer
lexString ('"':cs) = 
    lexStringChar "" cs
lexString _ = 
    internalError "Lexer" "lexString" "should start with \""

lexStringChar :: String -> Lexer
lexStringChar revSoFar input = do
    startPos <- getPos
    let curPos = addPos (length revSoFar + 1) startPos
    case input of
        [] -> 
            lexerError EOFInString startPos
        '\\':c:cs ->
            if c `elem` escapeChars then
                lexStringChar (c:'\\':revSoFar) cs
            else
                lexerError IllegalEscapeInString curPos
        '"':cs ->
            returnToken (LexString (reverse revSoFar)) 
                        (length revSoFar + 2) mainLexer cs
        '\n':_ ->
            lexerError NewLineInString startPos
        c:cs ->
            if ord c >= 32 && ord c <= 126 then
                lexStringChar (c:revSoFar) cs
            else
                lexerError IllegalCharInString curPos
                
nextCharSatisfy :: (Char -> Bool) -> String -> Bool
nextCharSatisfy _ []    = False
nextCharSatisfy p (c:_) = p c

returnToken :: Lexeme -> Int -> Lexer -> Lexer
returnToken lexeme width continue input = do
    pos <- getPos
    incPos width
    let token = (pos, lexeme) 
    tokens <- continue input
    return (token:tokens)
       
-----------------------------------------------------------
-- Comment
-----------------------------------------------------------

lexOneLineComment :: Lexer
lexOneLineComment input =
    case input of
        [] -> mainLexer []
        ('\n':cs) -> do
            nextPos '\n'
            mainLexer cs
        (c:cs) -> do
            nextPos c
            lexOneLineComment cs

lexMultiLineComment :: [SourcePos] -> Int -> Lexer
lexMultiLineComment starts level input =
    case input of 
        '-':'}':cs 
            | level == 0 -> do
                incPos 2
                mainLexer cs
            | otherwise ->  do
                incPos 2
                lexMultiLineComment (tail starts) (level - 1) cs
        '{':'-':cs -> do
            pos <- getPos
            lexerWarning (NestedComment (head starts)) pos
            incPos 2
            lexMultiLineComment (pos:starts) (level+1) cs
        c:cs -> do
            nextPos c
            lexMultiLineComment starts level cs
        [] -> 
            lexerError UnterminatedComment (head starts)
            -- at end-of-file show the most recently opened comment

lexFeedbackComment :: String -> SourcePos -> Lexer
lexFeedbackComment feedback start input =
    case input of 
        '#':'-':'}':cs ->
           returnToken (LexFeedback (reverse feedback)) 
                       (length feedback + 6) mainLexer cs
        c:cs -> do
            nextPos c            
            lexFeedbackComment (c:feedback) start cs
        [] -> 
            lexerError UnterminatedComment start

lexCaseFeedbackComment :: String -> SourcePos -> Lexer
lexCaseFeedbackComment feedback start input =
    case input of 
        '#':'-':'}':cs ->
            returnToken (LexCaseFeedback (reverse feedback)) 0 mainLexer cs
        c:cs ->
            -- nextPos c            
            lexCaseFeedbackComment (c:feedback) start cs
        [] -> 
            lexerError UnterminatedComment start
-----------------------------------------------------------
-- Utility functions
-----------------------------------------------------------

isSymbol :: Char -> Bool
isSymbol = (`elem` symbols)
                                  
isLetter :: Char -> Bool
isLetter '\'' = True
isLetter '_'  = True
isLetter c    = myIsAlphaNum c

-- write our own isLower.. so that we don't accept funny symbols
myIsLower :: Char -> Bool
myIsLower c = c >= 'a' && c <= 'z'

myIsUpper :: Char -> Bool
myIsUpper c = c >= 'A' && c <= 'Z'

myIsDigit :: Char -> Bool
myIsDigit c = c >= '0' && c <= '9'

myIsAlphaNum :: Char -> Bool
myIsAlphaNum c = myIsLower c || myIsUpper c || myIsDigit c

myIsSpace :: Char -> Bool
myIsSpace c = c == ' ' || c == '\n' || c == '\t' || c == '\r'

isName :: String -> Bool
isName [] = False
isName (hd:tl) = (myIsUpper hd || myIsLower hd) && all isLetter tl

-----------------------------------------------------------
-- Constants
-----------------------------------------------------------

escapeChars :: String
escapeChars = "\\nabfnrtv\"'"

symbols :: String
symbols = "!#$%&*+./<=>?@^|-~:\\"

keywords :: [String]
keywords = 
    [ "let", "in", "do", "where", "case", "of", "if"
    , "then", "else", "data", "type", "module", "import", "hiding"
    , "infix", "infixl", "infixr", "_", "deriving"
    , "class", "instance", "default", "newtype" -- not supported
    ]

reservedConSyms :: [String]
reservedConSyms =
    [ "::" ]

reservedVarSyms :: [String]
reservedVarSyms =
    [ "=>", "->", "<-", "..", "-", "-.", "@", "=", "\\", "|", "~" ]

specialsWithoutBrackets :: String
specialsWithoutBrackets = 
    ",`;" 

reserveStrategyNames :: [Token] -> [Token]
reserveStrategyNames = 
  map (\token@(pos, lexeme) -> case lexeme of 
                   LexVar s    | s `elem` strategiesKeywords -> (pos, LexKeyword s)
                   LexVarSym s | s == "=="                   -> (pos, LexResVarSym s)
                   LexConSym s | s == ":"                    -> (pos, LexResConSym s)
                   _ -> token
      )

strategiesKeywords :: [String]
strategiesKeywords = [ "phase", "constraints", "siblings" ]