This represents the same r.e. as r1|r2, but when collecting the results of subexpressions in (...) and r1 succeeds, the subexpressions of r2 are discarded, so r1 matches are prioritized

example

 splitSubex "({1}x)|({2}.)"   "x" = ([("1","x"),("2","x")], "")

 splitSubex "({1}x){|}({2}.)" "x" = ([("1","x")], "")

mkSeq :: GenRegex l -> GenRegex l -> GenRegex lSource

Construct the sequence r.e. r1.r2

mkSeqs :: [GenRegex l] -> GenRegex lSource

mkSeq extened to lists

mkRep :: Eq l => Int -> GenRegex l -> GenRegex lSource

Construct repetition r{i,}

mkRng :: Int -> Int -> GenRegex l -> GenRegex lSource

Construct range r{i,j}

mkOpt :: GenRegex l -> GenRegex lSource

Construct option r?

mkDiff :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource

Construct difference r.e.: r1 {\} r2

example

 match "[a-z]+{\\}bush" "obama"     = True
 match "[a-z]+{\\}bush" "clinton"   = True
 match "[a-z]+{\\}bush" "bush"      = False     -- not important any more

mkIsect :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource

Construct r.e. for intersection: r1 {&} r2

example

 match ".*a.*{&}.*b.*" "-a-b-"  = True
 match ".*a.*{&}.*b.*" "-b-a-"  = True
 match ".*a.*{&}.*b.*" "-a-a-"  = False
 match ".*a.*{&}.*b.*" "---b-"  = False

mkExor :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource

Construct r.e. for exclusive or: r1 {^} r2

example

 match "[a-c]+{^}[c-d]+" "abc"  = True
 match "[a-c]+{^}[c-d]+" "acdc" = False
 match "[a-c]+{^}[c-d]+" "ccc"  = False
 match "[a-c]+{^}[c-d]+" "cdc"  = True

mkInterleave :: GenRegex l -> GenRegex l -> GenRegex lSource

mkCompl :: Eq l => GenRegex l -> GenRegex lSource

Construct the Complement of an r.e.: whole set of words - r

mkBr :: l -> GenRegex l -> GenRegex lSource

Construct a labeled subexpression: ({label}r)

isZero :: GenRegex l -> Bool Source

errRegex :: GenRegex l -> String Source

nullable :: GenRegex l -> Bool Source

nullable' :: GenRegex l -> Nullable lSource

delta1 :: Eq l => GenRegex l -> Char -> GenRegex lSource

delta :: Eq l => GenRegex l -> String -> GenRegex lSource

firstChars :: GenRegex l -> CharSet Source

FIRST for regular expressions

this is only an approximation, the real set of char may be smaller, when the expression contains intersection, set difference or exor operators

matchWithRegex :: Eq l => GenRegex l -> String -> Bool Source

matchWithRegex' :: Eq l => GenRegex l -> String -> Maybe [(Label l, String)]Source

splitWithRegex :: Eq l => GenRegex l -> String -> Maybe ([(Label l, String)], String)Source

This function wraps the whole regex in a subexpression before starting the parse. This is done for getting acces to the whole parsed string. Therfore we need one special label, this label is the Nothing value, all explicit labels are Just labels.

splitWithRegex' :: Eq l => GenRegex l -> String -> Maybe (GenRegex l, String)Source

The main scanner function

splitWithRegexCS :: Eq l => GenRegex l -> CharSet -> String -> Maybe ([(Label l, String)], String)Source

splitWithRegexCS' :: Eq l => GenRegex l -> CharSet -> String -> Maybe (GenRegex l, String)Source

speedup version for splitWithRegex'

This function checks whether the input starts with a char from FIRST re. If this is not the case, the split fails. The FIRST set can be computed once for a whole tokenizer and reused by every call of split