Copyright | (c) Lev Dvorkin 2022 |
---|---|
License | MIT |
Maintainer | lev_135@mail.ru |
Stability | Experimental |
Safe Haskell | None |
Language | Haskell2010 |
This module reexports everything you need from the package
Synopsis
- data BlackWhiteSet c
- data Count
- data Repeatable c = Repeatable {
- getCnt :: Count
- getBWS :: BlackWhiteSet c
- data Token k c = Token {
- name :: k
- behind, ahead :: [BlackWhiteSet c]
- body :: [Repeatable c]
- data ConflictTokens k c = ConflictTokens {
- tokList1, tokList2 :: [(k, [BlackWhiteSet c])]
- checkUniqueTokenizing :: forall k c. Ord c => [Token k c] -> Either (ConflictTokens k c) ()
- data TokenizeMap k c
- makeTokenizeMap :: Ord c => [Token k c] -> TokenizeMap k c
- data TokenizeError k c
- = NoWayTokenize Int [(k, [c])]
- | TwoWaysTokenize Int [(k, [c])] [(k, [c])]
- tokenize :: forall k c. Ord c => TokenizeMap k c -> [c] -> Either (TokenizeError k c) [(k, [c])]
Structures for tokens representation
data BlackWhiteSet c Source #
Select some "white set" of available elements or "black set" of forbidden ones
Instances
Eq c => Eq (BlackWhiteSet c) Source # | |
Defined in Text.Tokenizer.BlackWhiteSet (==) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # (/=) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # | |
Ord c => Ord (BlackWhiteSet c) Source # | |
Defined in Text.Tokenizer.BlackWhiteSet compare :: BlackWhiteSet c -> BlackWhiteSet c -> Ordering # (<) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # (<=) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # (>) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # (>=) :: BlackWhiteSet c -> BlackWhiteSet c -> Bool # max :: BlackWhiteSet c -> BlackWhiteSet c -> BlackWhiteSet c # min :: BlackWhiteSet c -> BlackWhiteSet c -> BlackWhiteSet c # | |
Show c => Show (BlackWhiteSet c) Source # | |
Defined in Text.Tokenizer.BlackWhiteSet showsPrec :: Int -> BlackWhiteSet c -> ShowS # show :: BlackWhiteSet c -> String # showList :: [BlackWhiteSet c] -> ShowS # |
Number of symbols acceptable by Repeatable
data Repeatable c Source #
BlackWhiteSet
that can be repeated.
Repeatable | |
|
Instances
Eq c => Eq (Repeatable c) Source # | |
Defined in Text.Tokenizer.Types (==) :: Repeatable c -> Repeatable c -> Bool # (/=) :: Repeatable c -> Repeatable c -> Bool # | |
Ord c => Ord (Repeatable c) Source # | |
Defined in Text.Tokenizer.Types compare :: Repeatable c -> Repeatable c -> Ordering # (<) :: Repeatable c -> Repeatable c -> Bool # (<=) :: Repeatable c -> Repeatable c -> Bool # (>) :: Repeatable c -> Repeatable c -> Bool # (>=) :: Repeatable c -> Repeatable c -> Bool # max :: Repeatable c -> Repeatable c -> Repeatable c # min :: Repeatable c -> Repeatable c -> Repeatable c # | |
Show c => Show (Repeatable c) Source # | |
Defined in Text.Tokenizer.Types showsPrec :: Int -> Repeatable c -> ShowS # show :: Repeatable c -> String # showList :: [Repeatable c] -> ShowS # |
Token with name of type k
(used for uniqueness error messages and
tokenizing output) over char type c
.
Token | |
|
Uniqueness checking
data ConflictTokens k c Source #
Two ways of tokenizing a string, demonstrating non-uniqueness
ConflictTokens | |
|
Instances
(Eq k, Eq c) => Eq (ConflictTokens k c) Source # | |
Defined in Text.Tokenizer.Uniqueness (==) :: ConflictTokens k c -> ConflictTokens k c -> Bool # (/=) :: ConflictTokens k c -> ConflictTokens k c -> Bool # | |
(Ord k, Ord c) => Ord (ConflictTokens k c) Source # | |
Defined in Text.Tokenizer.Uniqueness compare :: ConflictTokens k c -> ConflictTokens k c -> Ordering # (<) :: ConflictTokens k c -> ConflictTokens k c -> Bool # (<=) :: ConflictTokens k c -> ConflictTokens k c -> Bool # (>) :: ConflictTokens k c -> ConflictTokens k c -> Bool # (>=) :: ConflictTokens k c -> ConflictTokens k c -> Bool # max :: ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c # min :: ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c # | |
(Show k, Show c) => Show (ConflictTokens k c) Source # | |
Defined in Text.Tokenizer.Uniqueness showsPrec :: Int -> ConflictTokens k c -> ShowS # show :: ConflictTokens k c -> String # showList :: [ConflictTokens k c] -> ShowS # |
checkUniqueTokenizing :: forall k c. Ord c => [Token k c] -> Either (ConflictTokens k c) () Source #
Check that there is no list of symbols, that can be decomposed to ways on the tokens from given list
Splitting string on tokens
data TokenizeMap k c Source #
Auxillary structure for tokenizing. Should be used as opaque type,
initializing by makeTokenizeMap
and concatenating by Semigroup
instance.
Instances
(Show c, Show k) => Show (TokenizeMap k c) Source # | |
Defined in Text.Tokenizer.Split showsPrec :: Int -> TokenizeMap k c -> ShowS # show :: TokenizeMap k c -> String # showList :: [TokenizeMap k c] -> ShowS # | |
Ord c => Semigroup (TokenizeMap k c) Source # | |
Defined in Text.Tokenizer.Split (<>) :: TokenizeMap k c -> TokenizeMap k c -> TokenizeMap k c # sconcat :: NonEmpty (TokenizeMap k c) -> TokenizeMap k c # stimes :: Integral b => b -> TokenizeMap k c -> TokenizeMap k c # | |
Ord c => Monoid (TokenizeMap k c) Source # | |
Defined in Text.Tokenizer.Split mempty :: TokenizeMap k c # mappend :: TokenizeMap k c -> TokenizeMap k c -> TokenizeMap k c # mconcat :: [TokenizeMap k c] -> TokenizeMap k c # |
makeTokenizeMap :: Ord c => [Token k c] -> TokenizeMap k c Source #
Create auxillary Map for tokenizing. Should be called once for initializing
data TokenizeError k c Source #
Error during tokenizing
Everywhere [(k, [c])]
type is used, the list of pairs with name of token
and part of string, matched by it is stored
NoWayTokenize | |
| |
TwoWaysTokenize | |
|
Instances
(Eq k, Eq c) => Eq (TokenizeError k c) Source # | |
Defined in Text.Tokenizer.Split (==) :: TokenizeError k c -> TokenizeError k c -> Bool # (/=) :: TokenizeError k c -> TokenizeError k c -> Bool # | |
(Show k, Show c) => Show (TokenizeError k c) Source # | |
Defined in Text.Tokenizer.Split showsPrec :: Int -> TokenizeError k c -> ShowS # show :: TokenizeError k c -> String # showList :: [TokenizeError k c] -> ShowS # |
tokenize :: forall k c. Ord c => TokenizeMap k c -> [c] -> Either (TokenizeError k c) [(k, [c])] Source #
Split list of symbols on tokens.