| Copyright | (c) Lev Dvorkin 2022 |
|---|---|
| License | MIT |
| Maintainer | lev_135@mail.ru |
| Stability | Experimental |
| Safe Haskell | None |
| Language | Haskell2010 |
Text.Tokenizer
Description
This module reexports everything you need from the package
Synopsis
- data BlackWhiteSet c
- data Count
- data Repeatable c = Repeatable {
- getCnt :: Count
- getBWS :: BlackWhiteSet c
- data Token k c = Token {
- name :: k
- behind, ahead :: [BlackWhiteSet c]
- body :: [Repeatable c]
- data ConflictTokens k c = ConflictTokens {
- tokList1, tokList2 :: [(k, [BlackWhiteSet c])]
- checkUniqueTokenizing :: forall k c. Ord c => [Token k c] -> Either (ConflictTokens k c) ()
- data TokenizeMap k c
- makeTokenizeMap :: Ord c => [Token k c] -> TokenizeMap k c
- data TokenizeError k c
- = NoWayTokenize Int [(k, [c])]
- | TwoWaysTokenize Int [(k, [c])] [(k, [c])]
- tokenize :: forall k c. Ord c => TokenizeMap k c -> [c] -> Either (TokenizeError k c) [(k, [c])]
Structures for tokens representation
data BlackWhiteSet c Source #
Select some "white set" of available elements or "black set" of forbidden ones
Instances
| Eq c => Eq (BlackWhiteSet c) Source # | |
Defined in Text.Tokenizer.BlackWhiteSet Methods (==) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # (/=) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # | |
| Ord c => Ord (BlackWhiteSet c) Source # | |
Defined in Text.Tokenizer.BlackWhiteSet Methods compare :: BlackWhiteSet c -> BlackWhiteSet c -> Ordering # (<) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # (<=) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # (>) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # (>=) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # max :: BlackWhiteSet c -> BlackWhiteSet c -> BlackWhiteSet c # min :: BlackWhiteSet c -> BlackWhiteSet c -> BlackWhiteSet c # | |
| Show c => Show (BlackWhiteSet c) Source # | |
Defined in Text.Tokenizer.BlackWhiteSet Methods showsPrec :: Int -> BlackWhiteSet c -> ShowS # show :: BlackWhiteSet c -> String # showList :: [BlackWhiteSet c] -> ShowS # | |
Number of symbols acceptable by Repeatable
data Repeatable c Source #
BlackWhiteSet that can be repeated.
Constructors
| Repeatable | |
Fields
| |
Instances
| Eq c => Eq (Repeatable c) Source # | |
Defined in Text.Tokenizer.Types | |
| Ord c => Ord (Repeatable c) Source # | |
Defined in Text.Tokenizer.Types Methods compare :: Repeatable c -> Repeatable c -> Ordering # (<) :: Repeatable c -> Repeatable c -> Bool # (<=) :: Repeatable c -> Repeatable c -> Bool # (>) :: Repeatable c -> Repeatable c -> Bool # (>=) :: Repeatable c -> Repeatable c -> Bool # max :: Repeatable c -> Repeatable c -> Repeatable c # min :: Repeatable c -> Repeatable c -> Repeatable c # | |
| Show c => Show (Repeatable c) Source # | |
Defined in Text.Tokenizer.Types Methods showsPrec :: Int -> Repeatable c -> ShowS # show :: Repeatable c -> String # showList :: [Repeatable c] -> ShowS # | |
Token with name of type k (used for uniqueness error messages and
tokenizing output) over char type c.
Constructors
| Token | |
Fields
| |
Uniqueness checking
data ConflictTokens k c Source #
Two ways of tokenizing a string, demonstrating non-uniqueness
Constructors
| ConflictTokens | |
Fields
| |
Instances
| (Eq k, Eq c) => Eq (ConflictTokens k c) Source # | |
Defined in Text.Tokenizer.Uniqueness Methods (==) :: ConflictTokens k c -> ConflictTokens k c -> Bool # (/=) :: ConflictTokens k c -> ConflictTokens k c -> Bool # | |
| (Ord k, Ord c) => Ord (ConflictTokens k c) Source # | |
Defined in Text.Tokenizer.Uniqueness Methods compare :: ConflictTokens k c -> ConflictTokens k c -> Ordering # (<) :: ConflictTokens k c -> ConflictTokens k c -> Bool # (<=) :: ConflictTokens k c -> ConflictTokens k c -> Bool # (>) :: ConflictTokens k c -> ConflictTokens k c -> Bool # (>=) :: ConflictTokens k c -> ConflictTokens k c -> Bool # max :: ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c # min :: ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c # | |
| (Show k, Show c) => Show (ConflictTokens k c) Source # | |
Defined in Text.Tokenizer.Uniqueness Methods showsPrec :: Int -> ConflictTokens k c -> ShowS # show :: ConflictTokens k c -> String # showList :: [ConflictTokens k c] -> ShowS # | |
checkUniqueTokenizing :: forall k c. Ord c => [Token k c] -> Either (ConflictTokens k c) () Source #
Check that there is no list of symbols, that can be decomposed to ways on the tokens from given list
Splitting string on tokens
data TokenizeMap k c Source #
Auxillary structure for tokenizing. Should be used as opaque type,
initializing by makeTokenizeMap and concatenating by Semigroup instance.
Instances
| (Show c, Show k) => Show (TokenizeMap k c) Source # | |
Defined in Text.Tokenizer.Split Methods showsPrec :: Int -> TokenizeMap k c -> ShowS # show :: TokenizeMap k c -> String # showList :: [TokenizeMap k c] -> ShowS # | |
| Ord c => Semigroup (TokenizeMap k c) Source # | |
Defined in Text.Tokenizer.Split Methods (<>) :: TokenizeMap k c -> TokenizeMap k c -> TokenizeMap k c # sconcat :: NonEmpty (TokenizeMap k c) -> TokenizeMap k c # stimes :: Integral b => b -> TokenizeMap k c -> TokenizeMap k c # | |
| Ord c => Monoid (TokenizeMap k c) Source # | |
Defined in Text.Tokenizer.Split Methods mempty :: TokenizeMap k c # mappend :: TokenizeMap k c -> TokenizeMap k c -> TokenizeMap k c # mconcat :: [TokenizeMap k c] -> TokenizeMap k c # | |
makeTokenizeMap :: Ord c => [Token k c] -> TokenizeMap k c Source #
Create auxillary Map for tokenizing. Should be called once for initializing
data TokenizeError k c Source #
Error during tokenizing
Everywhere [(k, [c])] type is used, the list of pairs with name of token
and part of string, matched by it is stored
Constructors
| NoWayTokenize | |
Fields
| |
| TwoWaysTokenize | |
Fields
| |
Instances
| (Eq k, Eq c) => Eq (TokenizeError k c) Source # | |
Defined in Text.Tokenizer.Split Methods (==) :: TokenizeError k c -> TokenizeError k c -> Bool # (/=) :: TokenizeError k c -> TokenizeError k c -> Bool # | |
| (Show k, Show c) => Show (TokenizeError k c) Source # | |
Defined in Text.Tokenizer.Split Methods showsPrec :: Int -> TokenizeError k c -> ShowS # show :: TokenizeError k c -> String # showList :: [TokenizeError k c] -> ShowS # | |
tokenize :: forall k c. Ord c => TokenizeMap k c -> [c] -> Either (TokenizeError k c) [(k, [c])] Source #
Split list of symbols on tokens.