{-# OPTIONS -Wall #-} 

module Language.Haskell.HBB.Internal.Lexer (
    getVariableIdUsingLexerAt,
    LexingFailReason(..),
    IncludeQualified(..)
    ) where

import Language.Haskell.HBB.Internal.SrcSpan
import StringBuffer
import FastString (unpackFS,mkFastString)
import GhcMonad (liftIO,GhcMonad)
import SrcLoc
import Lexer (lexTokenStream,ParseResult(..),Token(..))
import GHC (getSessionDynFlags)

-- | This type holds possible return values of getVariableIdUsingLexerAt.
data LexingFailReason = LexingFailed
                      | VarNotFound

data IncludeQualified = IncludeQualifiedVars
                      | ExcludeQualifiedVars
                      deriving (Eq)

-- | This function uses GHCs lexer to determine the token that is under the
-- cursor (the passed SrcLoc).
--
-- Currently only the tokens ITvarid (a variable id) and ITqvarid (a qualified
-- variable id) are supported. If IncludeQualified euqal ExcludeQualifiedVars
-- then ITqvarid will be ignored. A token of type ITqvarid has two strings
-- attached, the name of the module (the qualifier) and the name of the
-- variable.  Of these twos only the name is contained by the result.
getVariableIdUsingLexerAt :: GhcMonad m => (FilePath,BufLoc) -> IncludeQualified -> m (Either LexingFailReason (String,RealSrcSpan))
getVariableIdUsingLexerAt (filename,loc) behaviour = do
    let wholeFileLoc :: RealSrcLoc
        wholeFileLoc = mkRealSrcLoc (mkFastString filename) 1 1 
    ghcDynFlags <- getSessionDynFlags
    fileContent <- liftIO $ hGetStringBuffer filename

    let isRelevantToken :: Token -> Bool
        isRelevantToken (ITvarid  _)                                       = True
        isRelevantToken (ITqvarid _) | (behaviour == IncludeQualifiedVars) = True
        isRelevantToken _                                                  = False

        token2Result :: Token -> String
        token2Result (ITvarid     s ) = unpackFS s
        token2Result (ITqvarid (_,s)) = unpackFS s
        token2Result _                = error "Internal error (unexected wrong token type)"

    case lexTokenStream fileContent wholeFileLoc ghcDynFlags of
        
        -- Experiences showed that the lexer adds additional tokens
        -- to the token stream that have length 0 (e.g. ITvocurly or
        -- ITsemi). We want to filter the token stream for the
        -- (single) token that matches the SrcLoc passed as command
        -- line parameter. As this is obviously not enough
        -- (additional ITvocurly start at the same location), we have
        -- to filter them again. There are two possibilities:
        --  - only use non-zero-length tokens
        --  - only use ITvarid Tokens (variable IDs)
        --
        -- The current solution is to filter the tokens for elements that
        -- contain the passed source location and from the result only use
        -- the tokens of type ITvarid. This makes sense as we anyway have
        -- to extract the string from this token.
    
        POk     _ xs -> let relevantByLoc  = [ tok | tok@(L (RealSrcSpan s) _) <- xs 
                                              , (toBufLoc $ realSrcSpanStart s) <= loc 
                                              , (toBufLoc $ realSrcSpanEnd   s)  > loc ]
                            relevant       = [ (token2Result t,r) | (L (RealSrcSpan r) t) <- relevantByLoc , isRelevantToken t ]
                        in case relevant of [x] -> return $ Right x
                                            []  -> return $ Left VarNotFound
                                            _   -> error "internal error (too many tokens)"
        PFailed _ _  -> return $ Left LexingFailed