{- | Module : Text.Tokenizer.Types Copyright : (c) Lev Dvorkin, 2022 License : MIT Maintainer : lev_135@mail.ru Stability : Experimental This module contains common types used by uniqueness checking and tokenizing algorithms -} module Text.Tokenizer.Types ( Alt (..), Count (..), Repeatable (..), TokId, Token (..), RToken (..), makeRToken ) where import Data.Function (on) import Text.Tokenizer.BlackWhiteSet (BlackWhiteSet) import Control.Applicative (Alternative) -- | Type synonym for list monad used as a collection of alternatives newtype Alt a = Alt [a] deriving ( Eq, Ord, Show, Functor, Applicative, Monad, Alternative, Foldable, Traversable ) -- | Number of symbols acceptable by 'Repeatable' data Count = One | Some deriving (Eq, Ord, Show) -- | 'BlackWhiteSet' that can be repeated. data Repeatable c = Repeatable { getCnt :: Count, getBWS :: BlackWhiteSet c } deriving (Eq, Ord, Show) -- | Token with name of type @k@ (used for uniqueness error messages and -- tokenizing output) over char type @c@. data Token k c = Token { -- | the name of token name:: k, -- | restrictions on symbols before/after matchable part -- -- NB! they are assumed to be satisfied if there are no symbols before/after -- matched part respectively behind, ahead :: [BlackWhiteSet c], -- | matchable sequences of char sets with possible repetitions body :: [Repeatable c] } deriving (Show) -- | Token id type synonym. type TokId = Int -- | Type for internal needs. Contains autogenerated 'tokId' and restrictions -- behind token are reversed data RToken c = RToken { -- | unique token's id (generated automatically) tokId :: TokId, -- | constraints on symbols behind/ahead of matchable part rbehind, ahead :: [Repeatable c], -- | matchable part of string body :: [Repeatable c] } deriving (Show) -- | Compares by token's id instance Eq (RToken c) where (==) = (==) `on` tokId -- | Compares by token's id instance Ord (RToken c) where compare = compare `on` tokId -- | Construct an 'RToken' from 'Token' and its id makeRToken :: TokId -> Token k c -> RToken c makeRToken tokId Token{body, behind, ahead} = RToken { tokId, body, rbehind = Repeatable One <$> reverse behind, ahead = Repeatable One <$> ahead }