Portability | portable |
---|---|
Stability | experimental |
Maintainer | Uwe Schmidt (uwe@fh-wedel.de) |
W3C XML Schema Regular Expression Matcher
Grammar can be found under http://www.w3.org/TR/xmlschema11-2/#regexs
- type Regex = GenRegex String
- data GenRegex l
- mkZero :: String -> GenRegex l
- mkUnit :: GenRegex l
- mkSym :: CharSet -> GenRegex l
- mkSym1 :: Char -> GenRegex l
- mkSymRng :: Char -> Char -> GenRegex l
- mkWord :: [Char] -> GenRegex l
- mkDot :: GenRegex l
- mkStar :: Eq l => GenRegex l -> GenRegex l
- mkAll :: Eq l => GenRegex l
- mkAlt :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
- mkElse :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
- mkSeq :: GenRegex l -> GenRegex l -> GenRegex l
- mkSeqs :: [GenRegex l] -> GenRegex l
- mkRep :: Eq l => Int -> GenRegex l -> GenRegex l
- mkRng :: Int -> Int -> GenRegex l -> GenRegex l
- mkOpt :: GenRegex l -> GenRegex l
- mkDiff :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
- mkIsect :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
- mkExor :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
- mkInterleave :: GenRegex l -> GenRegex l -> GenRegex l
- mkCompl :: Eq l => GenRegex l -> GenRegex l
- mkBr :: l -> GenRegex l -> GenRegex l
- isZero :: GenRegex l -> Bool
- errRegex :: GenRegex l -> String
- nullable :: GenRegex l -> Bool
- nullable' :: GenRegex l -> Nullable l
- delta1 :: Eq l => GenRegex l -> Char -> GenRegex l
- delta :: Eq l => GenRegex l -> String -> GenRegex l
- firstChars :: GenRegex l -> CharSet
- matchWithRegex :: Eq l => GenRegex l -> String -> Bool
- matchWithRegex' :: Eq l => GenRegex l -> String -> Maybe [(Label l, String)]
- splitWithRegex :: Eq l => GenRegex l -> String -> Maybe ([(Label l, String)], String)
- splitWithRegex' :: Eq l => GenRegex l -> String -> Maybe (GenRegex l, String)
- splitWithRegexCS :: Eq l => GenRegex l -> CharSet -> String -> Maybe ([(Label l, String)], String)
- splitWithRegexCS' :: Eq l => GenRegex l -> CharSet -> String -> Maybe (GenRegex l, String)
Documentation
mkZero :: String -> GenRegex lSource
construct the r.e. for the empty set. An (error-) message may be attached
mkElse :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource
construct the r.e. for r1{|}r2 (r1 orElse r2).
This represents the same r.e. as r1|r2, but when collecting the results of subexpressions in (...) and r1 succeeds, the subexpressions of r2 are discarded, so r1 matches are prioritized
example
splitSubex "({1}x)|({2}.)" "x" = ([("1","x"),("2","x")], "") splitSubex "({1}x){|}({2}.)" "x" = ([("1","x")], "")
mkDiff :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource
Construct difference r.e.: r1 {\} r2
example
match "[a-z]+{\\}bush" "obama" = True match "[a-z]+{\\}bush" "clinton" = True match "[a-z]+{\\}bush" "bush" = False -- not important any more
mkIsect :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource
Construct r.e. for intersection: r1 {&} r2
example
match ".*a.*{&}.*b.*" "-a-b-" = True match ".*a.*{&}.*b.*" "-b-a-" = True match ".*a.*{&}.*b.*" "-a-a-" = False match ".*a.*{&}.*b.*" "---b-" = False
mkExor :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource
Construct r.e. for exclusive or: r1 {^} r2
example
match "[a-c]+{^}[c-d]+" "abc" = True match "[a-c]+{^}[c-d]+" "acdc" = False match "[a-c]+{^}[c-d]+" "ccc" = False match "[a-c]+{^}[c-d]+" "cdc" = True
mkInterleave :: GenRegex l -> GenRegex l -> GenRegex lSource
mkCompl :: Eq l => GenRegex l -> GenRegex lSource
Construct the Complement of an r.e.: whole set of words - r
firstChars :: GenRegex l -> CharSetSource
FIRST for regular expressions
this is only an approximation, the real set of char may be smaller, when the expression contains intersection, set difference or exor operators
splitWithRegex :: Eq l => GenRegex l -> String -> Maybe ([(Label l, String)], String)Source
This function wraps the whole regex in a subexpression before starting the parse. This is done for getting acces to the whole parsed string. Therfore we need one special label, this label is the Nothing value, all explicit labels are Just labels.
splitWithRegex' :: Eq l => GenRegex l -> String -> Maybe (GenRegex l, String)Source
The main scanner function
splitWithRegexCS :: Eq l => GenRegex l -> CharSet -> String -> Maybe ([(Label l, String)], String)Source
splitWithRegexCS' :: Eq l => GenRegex l -> CharSet -> String -> Maybe (GenRegex l, String)Source
speedup version for splitWithRegex'
This function checks whether the input starts with a char from FIRST re. If this is not the case, the split fails. The FIRST set can be computed once for a whole tokenizer and reused by every call of split