module DDC.Core.Lexer.Tokens
        ( Located (..)
        , columnOfLocated

          -- * Tokens
        , Token         (..)      
        , TokenMeta     (..)      
        , TokenAtom     (..)      
        , TokenNamed    (..)    

        , Keyword       (..)
        , Symbol        (..)
        , Builtin       (..)
        , Literal       (..)

          -- ** Description
        , describeToken
        , describeTokenMeta
        , describeTokenAtom
        , describeTokenNamed
        , sayKeyword
        , saySymbol
        , sayBuiltin

          -- ** Renaming
        , renameToken

          -- ** Predicates
        , isVarName
        , isVarStart
        , isVarBody

        , isConName
        , isConStart
        , isConBody

        , isLitName
        , isLitStart
        , isLitBody

          -- ** Literal Reading
        , readLitInteger
        , readLitNat
        , readLitInt
        , readLitSize
        , readLitWordOfBits
        , readLitFloatOfBits
        , readBinary
        , readHex)
where
import DDC.Data.SourcePos
import DDC.Core.Lexer.Token.Symbol
import DDC.Core.Lexer.Token.Keyword
import DDC.Core.Lexer.Token.Builtin
import DDC.Core.Lexer.Token.Literal
import DDC.Core.Lexer.Token.Names
import DDC.Core.Pretty
import Control.Monad
import Data.Text                (Text)
import qualified Data.Text      as T


-- TokenFamily ----------------------------------------------------------------
-- | The family of a token.
--   This is used to help generate parser error messages,
--   so we can say ''the constructor Cons''
--             and ''the keyword case'' etc.
data TokenFamily
        = Symbol
        | Keyword
        | Constructor
        | Index
        | Literal
        | Pragma


-- | Describe a token family, for parser error messages.
describeTokenFamily :: TokenFamily -> String
describeTokenFamily tf
 = case tf of
        Symbol          -> "symbol"
        Keyword         -> "keyword"
        Constructor     -> "constructor"
        Index           -> "index"
        Literal         -> "literal"
        Pragma          -> "pragma"


-- Token ------------------------------------------------------------------------
-- | Tokens accepted by the core language parser.
data Token n
        -- | Some junk symbol that isn't part of the language.
        = KErrorJunk   String

        -- | The first part of an unterminated string.
        | KErrorUnterm String

        -- | Meta tokens contain out-of-band information that is eliminated
        --   before parsing proper.
        | KM    !TokenMeta

        -- | Atomic tokens are keywords, punctuation and baked-in 
        --   constructor names.
        | KA    !TokenAtom 

        -- | A named token that is specific to the language fragment 
        --   (maybe it's a primop), or a user defined name.
        | KN    !(TokenNamed n)
        deriving (Eq, Show)


-- | Apply a function to all the names in a `Tok`.
renameToken
        :: Ord n2
        => (n1 -> Maybe n2) 
        -> Token n1 
        -> Maybe (Token n2)

renameToken f kk
 = case kk of
        KErrorJunk s 
         -> Just $ KErrorJunk s

        KErrorUnterm s
          -> Just $ KErrorUnterm s

        KM t    -> Just $ KM t
        KA t    -> Just $ KA t
        KN t    -> liftM KN $ renameTokenNamed f t


-- | Describe a token for parser error messages.
describeToken :: Pretty n => Token n -> String
describeToken kk
 = case kk of
        KErrorJunk c    -> "character " ++ show c
        KErrorUnterm _  -> "unterminated string"
        KM tm           -> describeTokenMeta  tm
        KA ta           -> describeTokenAtom  ta
        KN tn           -> describeTokenNamed tn


-- TokMeta --------------------------------------------------------------------
-- | Meta tokens contain out-of-band information that is 
--   eliminated before parsing proper.
data TokenMeta
        = KNewLine

        -- | Comment string.
        | KComment String

        -- | This is injected by `dropCommentBlock` when it finds
        --   an unterminated block comment.
        | KCommentUnterminated

        -- | This is injected by `applyOffside` when it finds an explit close
        --   brace in a position where it would close a synthetic one.
        | KOffsideClosingBrace
        deriving (Eq, Show)


-- | Describe a TokMeta, for lexer error messages.
describeTokenMeta :: TokenMeta -> String
describeTokenMeta tm
 = case tm of
        KNewLine                -> "new line"
        KComment{}              -> "comment"
        KCommentUnterminated    -> "unterminated block comment"
        KOffsideClosingBrace    -> "closing brace"


-- TokAtom --------------------------------------------------------------------
-- | Atomic tokens are keywords, punctuation and baked-in constructor names.
--   They don't contain user-defined names or primops specific to the 
--   language fragment.
data TokenAtom
        -- | Pragmas.
        = KPragma  Text

        -- | Symbols.
        | KSymbol  Symbol

        -- | Keywords.
        | KKeyword Keyword

        -- | Builtin names.
        | KBuiltin Builtin

        -- | Infix operators, like in 1 + 2.
        | KOp      String

        -- | Wrapped operator, like in (+) 1 2.
        | KOpVar   String       

        -- | Debrujn indices.
        | KIndex   Int

        -- | Literal values.
        | KLiteral 
                Literal         -- Literal value.
                Bool            -- Trailing '#' prim specifier.
        deriving (Eq, Show)


-- | Describe a `TokAtom`, for parser error messages.
describeTokenAtom  :: TokenAtom -> String
describeTokenAtom ta
 = let  (family, str)           = describeTokenAtom' ta
   in   describeTokenFamily family ++ " " ++ show str

describeTokenAtom' :: TokenAtom -> (TokenFamily, String)
describeTokenAtom' ta
 = case ta of
        KPragma p       -> (Pragma,  "{-#" ++ T.unpack p ++ "#-}")
        KSymbol  ss     -> (Symbol,      saySymbol ss)
        KKeyword kw     -> (Keyword,     sayKeyword kw)
        KBuiltin bb     -> (Constructor, sayBuiltin bb)
        KOp      op     -> (Symbol, op)
        KOpVar   op     -> (Symbol, "(" ++ op ++ ")")
        KIndex   i      -> (Index,   "^" ++ show i)
        KLiteral l b    -> (Literal, show (l, b))


-- TokNamed -------------------------------------------------------------------
-- | A token with a user-defined name.
data TokenNamed n
        = KCon n
        | KVar n
        deriving (Eq, Show)


-- | Describe a `TokNamed`, for parser error messages.
describeTokenNamed :: Pretty n => TokenNamed n -> String
describeTokenNamed tn
 = case tn of
        KCon n  -> renderPlain $ text "constructor" <+> (dquotes $ ppr n)
        KVar n  -> renderPlain $ text "variable"    <+> (dquotes $ ppr n)


-- | Apply a function to all the names in a `TokNamed`.
renameTokenNamed 
        :: Ord n2
        => (n1 -> Maybe n2) 
        -> TokenNamed n1 
        -> Maybe (TokenNamed n2)

renameTokenNamed f kk
  = case kk of
        KCon c           -> liftM KCon $ f c
        KVar c           -> liftM KVar $ f c