module DDC.Core.Lexer.Tokens
        ( -- * Tokens
          Tok      (..)
        , renameTok
        , describeTok

          -- * Meta Tokens
        , TokMeta  (..)
        , describeTokMeta

          -- * Atomic Tokens
        , TokAtom  (..)
        , describeTokAtom

          -- * Named Tokens
        , TokNamed (..)
        , describeTokNamed)
where
import DDC.Core.Pretty
import DDC.Core.Exp
import Control.Monad


-- TokenFamily ----------------------------------------------------------------
-- | The family of a token.
--   This is used to help generate parser error messages,
--   so we can say ''the constructor Cons''
--             and ''the keyword case'' etc.
data TokenFamily
        = Symbol
        | Keyword
        | Constructor
        | Index
        | Variable


-- | Describe a token family, for parser error messages.
describeTokenFamily :: TokenFamily -> String
describeTokenFamily tf
 = case tf of
        Symbol          -> "symbol"
        Keyword         -> "keyword"
        Constructor     -> "constructor"
        Index           -> "index"
        Variable        -> "variable"


-- Tok ------------------------------------------------------------------------
-- | Tokens accepted by the core language parser.
data Tok n
        -- | Some junk symbol that isn't part of the language.
        = KJunk String

        -- | Meta tokens contain out-of-band information that is eliminated
        --   before parsing proper.
        | KM    !TokMeta

        -- | Atomic tokens are keywords, punctuation and baked-in 
        --   constructor names.
        | KA    !TokAtom 

        -- | A named token that is specific to the language fragment 
        --   (maybe it's a primop), or a user defined name.
        | KN    !(TokNamed n)
        deriving (Eq, Show)


-- | Apply a function to all the names in a `Tok`.
renameTok
        :: Ord n2
        => (n1 -> Maybe n2) 
        -> Tok n1 
        -> Maybe (Tok n2)

renameTok f kk
 = case kk of
        KJunk s -> Just $ KJunk s
        KM t    -> Just $ KM t
        KA t    -> Just $ KA t
        KN t    -> liftM KN $ renameTokNamed f t


-- | Describe a token for parser error messages.
describeTok :: Pretty n => Tok n -> String
describeTok kk
 = case kk of
        KJunk c         -> "character " ++ show c
        KM tm           -> describeTokMeta  tm
        KA ta           -> describeTokAtom  ta
        KN tn           -> describeTokNamed tn


-- TokMeta --------------------------------------------------------------------
-- | Meta tokens contain out-of-band information that is 
--   eliminated before parsing proper.
data TokMeta
        = KNewLine
        | KCommentLineStart
        | KCommentBlockStart
        | KCommentBlockEnd

        -- | This is injected by `dropCommentBlock` when it finds
        --   an unterminated block comment.
        | KCommentUnterminated

        -- | This is injected by `applyOffside` when it finds an explit close
        --   brace in a position where it would close a synthetic one.
        | KOffsideClosingBrace
        deriving (Eq, Show)


-- | Describe a TokMeta, for lexer error messages.
describeTokMeta :: TokMeta -> String
describeTokMeta tm
 = case tm of
        KNewLine                -> "new line"
        KCommentLineStart       -> "comment start"
        KCommentBlockStart      -> "block comment start"
        KCommentBlockEnd        -> "block comment end"
        KCommentUnterminated    -> "unterminated block comment"
        KOffsideClosingBrace    -> "closing brace"


-- TokAtom --------------------------------------------------------------------
-- | Atomic tokens are keywords, punctuation and baked-in constructor names.
--   They don't contain user-defined names or primops specific to the 
--   language fragment.
data TokAtom
        -- parens
        = KRoundBra
        | KRoundKet
        | KSquareBra
        | KSquareKet
        | KBraceBra
        | KBraceKet
        | KAngleBra
        | KAngleKet

        -- compound parens
        | KSquareColonBra
        | KSquareColonKet
        | KAngleColonBra
        | KAngleColonKet

        -- punctuation
        | KDot
        | KBar
        | KHat
        | KPlus
        | KColon
        | KComma
        | KBackSlash
        | KSemiColon
        | KUnderscore
        | KEquals
        | KAmpersand
        | KDash
        | KColonColon
        | KBigLambda

        -- symbolic constructors
        | KArrowTilde
        | KArrowDash
        | KArrowDashLeft
        | KArrowEquals

        -- bottoms
        | KBotEffect
        | KBotClosure

        -- core keywords
        | KModule
        | KImports
        | KExports
        | KWith
        | KWhere
        | KIn
        | KLet
        | KLazy
        | KLetRec
        | KLetRegions
        | KLetRegion
        | KWithRegion
        | KCase
        | KOf
        | KType
        | KWeakEff
        | KWeakClo
        | KPurify
        | KForget
        | KSuspend
        | KRun

        -- sugar keywords
        | KDo
        | KMatch
        | KElse

        -- debruijn indices
        | KIndex Int

        -- builtin names ------------
        --   sort constructors.
        | KSoConBuiltin SoCon

        --   kind constructors.
        | KKiConBuiltin KiCon

        --   witness type constructors.
        | KTwConBuiltin TwCon

        --   witness constructors.
        | KWbConBuiltin WbCon

        --   other builtin spec constructors.
        | KTcConBuiltin TcCon

        --   the unit data constructor.
        | KDaConUnit
        deriving (Eq, Show)


-- | Describe a `TokAtom`, for parser error messages.
describeTokAtom  :: TokAtom -> String
describeTokAtom ta
 = let  (family, str)           = describeTokAtom' ta
   in   describeTokenFamily family ++ " " ++ show str

describeTokAtom' :: TokAtom -> (TokenFamily, String)
describeTokAtom' ta
 = case ta of
        -- parens
        KRoundBra               -> (Symbol, "(")
        KRoundKet               -> (Symbol, ")")
        KSquareBra              -> (Symbol, "[")
        KSquareKet              -> (Symbol, "]")
        KBraceBra               -> (Symbol, "{")
        KBraceKet               -> (Symbol, "}")
        KAngleBra               -> (Symbol, "<")
        KAngleKet               -> (Symbol, ">")

        -- compound parens
        KSquareColonBra         -> (Symbol, "[:")
        KSquareColonKet         -> (Symbol, ":]")
        KAngleColonBra          -> (Symbol, "<:")
        KAngleColonKet          -> (Symbol, ":>")

        -- punctuation
        KDot                    -> (Symbol, ".")
        KBar                    -> (Symbol, "|")
        KHat                    -> (Symbol, "^")
        KPlus                   -> (Symbol, "+")
        KColon                  -> (Symbol, ":")
        KComma                  -> (Symbol, ",")
        KBackSlash              -> (Symbol, "\\")
        KSemiColon              -> (Symbol, ";")
        KUnderscore             -> (Symbol, "_")
        KEquals                 -> (Symbol, "=")
        KAmpersand              -> (Symbol, "&")
        KDash                   -> (Symbol, "-")
        KColonColon             -> (Symbol, "::")
        KBigLambda              -> (Symbol, "/\\")

        -- symbolic constructors
        KArrowTilde             -> (Constructor, "~>")
        KArrowDash              -> (Constructor, "->")
        KArrowDashLeft          -> (Constructor, "<-")
        KArrowEquals            -> (Constructor, "=>")

        -- bottoms
        KBotEffect              -> (Constructor, "!0")
        KBotClosure             -> (Constructor, "!$")

        -- expression keywords
        KModule                 -> (Keyword, "module")
        KImports                -> (Keyword, "imports")
        KExports                -> (Keyword, "exports")
        KWith                   -> (Keyword, "with")
        KWhere                  -> (Keyword, "where")
        KIn                     -> (Keyword, "in")
        KLet                    -> (Keyword, "let")
        KLazy                   -> (Keyword, "lazy")
        KLetRec                 -> (Keyword, "letrec")
        KLetRegions             -> (Keyword, "letregions")
        KLetRegion              -> (Keyword, "letregion")
        KWithRegion             -> (Keyword, "withregion")
        KCase                   -> (Keyword, "case")
        KOf                     -> (Keyword, "of")
        KType                   -> (Keyword, "type")
        KWeakEff                -> (Keyword, "weakeff")
        KWeakClo                -> (Keyword, "weakclo")
        KPurify                 -> (Keyword, "purify")
        KForget                 -> (Keyword, "forget")
        KSuspend                -> (Keyword, "suspend")
        KRun                    -> (Keyword, "run")

        -- sugar keywords
        KDo                     -> (Keyword, "do")
        KMatch                  -> (Keyword, "match")
        KElse                   -> (Keyword, "else")

        -- debruijn indices
        KIndex i                -> (Index,   "^" ++ show i)

        -- builtin names
        KSoConBuiltin so        -> (Constructor, renderPlain $ ppr so)
        KKiConBuiltin ki        -> (Constructor, renderPlain $ ppr ki)
        KTwConBuiltin tw        -> (Constructor, renderPlain $ ppr tw)
        KWbConBuiltin wi        -> (Constructor, renderPlain $ ppr wi)
        KTcConBuiltin tc        -> (Constructor, renderPlain $ ppr tc)
        KDaConUnit              -> (Constructor, "()")
        

-- TokNamed -------------------------------------------------------------------
-- | A token with a user-defined name.
data TokNamed n
        = KCon n
        | KVar n
        | KLit n
        deriving (Eq, Show)


-- | Describe a `TokNamed`, for parser error messages.
describeTokNamed :: Pretty n => TokNamed n -> String
describeTokNamed tn
 = case tn of
        KCon n  -> renderPlain $ text "constructor" <+> (dquotes $ ppr n)
        KVar n  -> renderPlain $ text "variable"    <+> (dquotes $ ppr n)
        KLit n  -> renderPlain $ text "literal"     <+> (dquotes $ ppr n)


-- | Apply a function to all the names in a `TokNamed`.
renameTokNamed 
        :: Ord n2
        => (n1 -> Maybe n2) 
        -> TokNamed n1 
        -> Maybe (TokNamed n2)

renameTokNamed f kk
  = case kk of
        KCon c           -> liftM KCon $ f c
        KVar c           -> liftM KVar $ f c
        KLit c           -> liftM KLit $ f c