Text/GrammarCombinators/Base/Token.hs

{-  Copyright 2010 Dominique Devriese

    This file is part of the grammar-combinators library.

    The grammar-combinators library is free software: you can
    redistribute it and/or modify it under the terms of the GNU
    Lesser General Public License as published by the Free
    Software Foundation, either version 3 of the License, or (at
    your option) any later version.

    Foobar is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General
    Public License along with Foobar. If not, see
    <http://www.gnu.org/licenses/>.
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}

module Text.GrammarCombinators.Base.Token where

import Language.Haskell.TH.Syntax (Lift)

import Data.Enumerable

-- | The 'Token' class identifies a type that can be used as terminal
-- identifier in a grammar definition. The type 't' itself is an
-- abstract identifier, identifying a certain type of terminals, but
-- any value of type 't' can correspond to a possibly infinite numer
-- of values of type 'ConcreteToken t'. For example, if you use a lexer
-- in a simple arithmetic expressions grammar, your lexer would typically
-- return values like 'PLUS', 'MINUS', but also 'INTEGER 42' when a
-- number is lexed. In this case, a separate Token type t would be defined,
-- such that a value 'INTEGER_T' of the 'Token' type t could
-- correspond to all values of the form 'INTEGER n' (for n an Integer)
-- of type 'ConcreteToken t'. A production rule defined as 
-- 'token' INTEGER_T would then produce result values of type 
-- 'ConcreteToken' t (e.g. INTEGER 42).
-- 
-- The requirements on 'Token' types are relatively strict, but this is
-- necessary to make it usable in table-based parser algorithms.
-- We reference the 'Lift' class to allow for compile-time
-- precalculation of tables using Template Haskell (See the LL1 and 
-- RealLL1 parsers).
-- 
-- Note that in some cases it is inefficient to use Char directly as 
-- token type, because of the big amount of tokens. For example when using
-- 'transformLeftCorner', the new domain will contain O(n*t + n^2)
-- non-terminals where n is the amount of non-terminals and t is th 
-- number of tokens, so when using this transformation, it is beneficial to
-- use a token type with less token values than 'Char', at
-- least if you will use algorithms that fold over the full new grammar's domain 
-- (e.g. 'printGrammar' does, 'printReachableGrammar' doesn't).
class (Show (ConcreteToken t), Eq (ConcreteToken t), Lift (ConcreteToken t),
       Eq t, Show t, Ord t, Lift t, Enumerable t) =>
      Token t where
  type ConcreteToken t
  -- | The 'classify' function classifies a given 'ConcreteToken' t into
  -- the value of type t it is represented by. 
  classify :: ConcreteToken t -> t
  -- | The 'enumConcreteTokens' function returns a (possibly infinite)
  -- list of all concrete tokens of type 'ConcreteToken t' 
  -- corresponding to a given token of 'Token' type t
  enumConcreteTokens :: t -> [ConcreteToken t]
  
instance Token Char where
  type ConcreteToken Char = Char
  classify = id
  enumConcreteTokens c = [c]