Copyright | Copyright (C) 2014 - Uwe Schmidt |
---|---|
License | MIT |
Maintainer | Uwe Schmidt <uwe@fh-wedel.de> |
Stability | stable |
Portability | portable |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
W3C XML Schema Regular Expression Matcher
Grammar can be found under http://www.w3.org/TR/xmlschema11-2/#regexs
- data GenRegex s
- mkZero :: s -> GenRegex s
- mkZero' :: StringLike s => String -> GenRegex s
- mkUnit :: GenRegex s
- mkSym :: StringLike s => CharSet -> GenRegex s
- mkSym1 :: StringLike s => Char -> GenRegex s
- mkSymRng :: StringLike s => Char -> Char -> GenRegex s
- mkWord :: StringLike s => [Char] -> GenRegex s
- mkDot :: GenRegex s
- mkStar :: StringLike s => GenRegex s -> GenRegex s
- mkAll :: StringLike s => GenRegex s
- mkAlt :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkElse :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkSeq :: GenRegex s -> GenRegex s -> GenRegex s
- mkSeqs :: [GenRegex s] -> GenRegex s
- mkRep :: StringLike s => Int -> GenRegex s -> GenRegex s
- mkRng :: StringLike s => Int -> Int -> GenRegex s -> GenRegex s
- mkOpt :: StringLike s => GenRegex s -> GenRegex s
- mkDiff :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkIsect :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkExor :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkInterleave :: GenRegex s -> GenRegex s -> GenRegex s
- mkCompl :: StringLike s => GenRegex s -> GenRegex s
- mkBr :: s -> GenRegex s -> GenRegex s
- mkBr' :: StringLike s => String -> GenRegex s -> GenRegex s
- isZero :: GenRegex s -> Bool
- errRegex :: StringLike s => GenRegex s -> s
- nullable :: StringLike s => GenRegex s -> Bool
- nullable' :: StringLike s => GenRegex s -> Nullable s
- delta1 :: StringLike s => Char -> s -> GenRegex s -> GenRegex s
- delta :: StringLike s => s -> GenRegex s -> GenRegex s
- firstChars :: StringLike s => GenRegex s -> CharSet
- matchWithRegex :: StringLike s => GenRegex s -> s -> Bool
- matchWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s)
- splitWithRegex :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s, s)
- splitWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (GenRegex s, s)
- splitWithRegexCS :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s)
- splitWithRegexCS' :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (GenRegex s, s)
Documentation
mkZero :: s -> GenRegex s Source
construct the r.e. for the empty set. An (error-) message may be attached
mkZero' :: StringLike s => String -> GenRegex s Source
mkSym :: StringLike s => CharSet -> GenRegex s Source
construct the r.e. for a set of chars
mkSym1 :: StringLike s => Char -> GenRegex s Source
construct an r.e. for a single char set
mkSymRng :: StringLike s => Char -> Char -> GenRegex s Source
construct an r.e. for an intervall of chars
mkWord :: StringLike s => [Char] -> GenRegex s Source
mkSym generaized for strings
mkStar :: StringLike s => GenRegex s -> GenRegex s Source
construct r.e. for r*
mkAll :: StringLike s => GenRegex s Source
construct an r.e. for the set of all Unicode words
mkElse :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s Source
construct the r.e. for r1{|}r2 (r1 orElse r2).
This represents the same r.e. as r1|r2, but when collecting the results of subexpressions in (...) and r1 succeeds, the subexpressions of r2 are discarded, so r1 matches are prioritized
example
splitSubex "({1}x)|({2}.)" "x" = ([("1","x"),("2","x")], "") splitSubex "({1}x){|}({2}.)" "x" = ([("1","x")], "")
mkOpt :: StringLike s => GenRegex s -> GenRegex s Source
Construct option r?
mkDiff :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s Source
Construct difference r.e.: r1 {\} r2
example
match "[a-z]+{\\}bush" "obama" = True match "[a-z]+{\\}bush" "clinton" = True match "[a-z]+{\\}bush" "bush" = False -- not important any more
mkIsect :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s Source
Construct r.e. for intersection: r1 {&} r2
example
match ".*a.*{&}.*b.*" "-a-b-" = True match ".*a.*{&}.*b.*" "-b-a-" = True match ".*a.*{&}.*b.*" "-a-a-" = False match ".*a.*{&}.*b.*" "---b-" = False
mkExor :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s Source
Construct r.e. for exclusive or: r1 {^} r2
example
match "[a-c]+{^}[c-d]+" "abc" = True match "[a-c]+{^}[c-d]+" "acdc" = False match "[a-c]+{^}[c-d]+" "ccc" = False match "[a-c]+{^}[c-d]+" "cdc" = True
mkInterleave :: GenRegex s -> GenRegex s -> GenRegex s Source
mkCompl :: StringLike s => GenRegex s -> GenRegex s Source
Construct the Complement of an r.e.: whole set of words - r
errRegex :: StringLike s => GenRegex s -> s Source
nullable :: StringLike s => GenRegex s -> Bool Source
nullable' :: StringLike s => GenRegex s -> Nullable s Source
delta :: StringLike s => s -> GenRegex s -> GenRegex s Source
firstChars :: StringLike s => GenRegex s -> CharSet Source
FIRST for regular expressions
this is only an approximation, the real set of char may be smaller, when the expression contains intersection, set difference or exor operators
matchWithRegex :: StringLike s => GenRegex s -> s -> Bool Source
matchWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s) Source
splitWithRegex :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s, s) Source
This function wraps the whole regex in a subexpression before starting the parse. This is done for getting access to the whole parsed string. Therfore we need one special label, this label is the Nothing value, all explicit labels are Just labels.
splitWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (GenRegex s, s) Source
The main scanner function
splitWithRegexCS :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s) Source
splitWithRegexCS' :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (GenRegex s, s) Source
speedup version for splitWithRegex'
This function checks whether the input starts with a char from FIRST re. If this is not the case, the split fails. The FIRST set can be computed once for a whole tokenizer and reused by every call of split