define CharRegex { parse (pattern) (matcher) { ReadIterator p <- ReadIterator$$fromReadPosition(pattern) // TODO: Needs error handling. { _, matcher } <- parseExpression(p) } match (template,data) { ReadIterator p <- ReadIterator$$fromReadPosition(data) Matcher matcher <- template.newMatcher() while (!p.pastForwardEnd()) { MatchState state <- matcher.tryNextMatch(p.readCurrent()) if (state `MatchState$equals` MatchState$matchFail()) { break } p <- p.forward() if (state `MatchState$equals` MatchState$matchComplete()) { break } } return p.pastForwardEnd() && matcher.matchSatisfied() } @type parseSequence (ReadIterator) -> (ReadIterator,optional ReadSequence>) parseSequence (p) { if (p.pastForwardEnd()) { return { p, empty } } { ReadIterator p2, optional MatcherTemplate matcher } <- parseNonSequence(p) if (!p2.pastForwardEnd() && (p2.readCurrent() == '|' || p2.readCurrent() == ')')) { // Requires choice matching or the end of a subexpression. if (present(matcher)) { return { p2, LinkedNode>$create(require(matcher),empty) } } else { // TODO: Disregards errors from parseNonSequence. return { p2, LinkedNode>$create(MatchEmpty$create(),empty) } } } if (!present(matcher)) { return { p2, empty } } else { { p2, optional ReadSequence> sequence } <- parseSequence(p2) return { p2, LinkedNode>$create(require(matcher),sequence) } } } @type parseNonSequence (ReadIterator) -> (ReadIterator,optional MatcherTemplate) parseNonSequence (p) (p2,matcher) { p2 <- p matcher <- empty while (!p2.pastForwardEnd()) { Char c <- p2.readCurrent() if (c == '|' || c == ')') { // Requires choice matching or the end of a subexpression. return _ } elif (c == '*') { // TODO: Needs error handling. return { p2.forward(), MatchBranches$create(BranchRepeat$createZeroPlus(require(matcher))) } } elif (c == '+') { // TODO: Needs error handling. return { p2.forward(), MatchBranches$create(BranchRepeat$createOnePlus(require(matcher))) } } elif (c == '{') { { p2, Int min, Int max } <- parseRange(p2.forward()) if (p2.pastForwardEnd() || p2.readCurrent() != '}') { // TODO: Needs error handling. fail("missing }") } return { p2.forward(), MatchBranches$create(BranchRepeat$createRange(min,max,require(matcher))) } } elif (present(matcher)) { return _ } elif (c == '[') { { p2, matcher } <- parseCharChoices(p2.forward()) if (p2.pastForwardEnd() || p2.readCurrent() != ']') { // TODO: Needs error handling. fail("missing ]") } p2 <- p2.forward() } elif (c == '(') { { p2, matcher } <- parseExpression(p2.forward()) if (p2.pastForwardEnd() || p2.readCurrent() != ')') { // TODO: Needs error handling. fail("missing )") } p2 <- p2.forward() } else { { p2, matcher } <- parseSingleChar(p2) } } } @type parseRange (ReadIterator) -> (ReadIterator,Int,Int) parseRange (p) (p2,min,max) { max <- 0 { p2, min } <- parseCount(p) if (p2.pastForwardEnd() || p2.readCurrent() != ',') { max <- min } else { { p2, max } <- parseCount(p2.forward()) } } @type parseCount (ReadIterator) -> (ReadIterator,Int) parseCount (p) (p2,count) { // TODO: Needs error handling. count <- 0 p2 <- p while (!p2.pastForwardEnd()) { Char c <- p2.readCurrent() if (c >= '0' && c <= '9') { count <- 10*count + (c - '0') } else { break } } update { p2 <- p2.forward() } } @type parseExpression (ReadIterator) -> (ReadIterator,optional MatcherTemplate) parseExpression (p) (p2,matcher) { optional ReadSequence> choices <- empty p2 <- p while (!p2.pastForwardEnd()) { { p2, optional ReadSequence> sequence } <- parseSequence(p2) if (!present(sequence)) { break } choices <- LinkedNode>$create( MatchBranches$create(BranchSequence$create(sequence)),choices) if (p2.pastForwardEnd() || p2.readCurrent() != '|') { break } p2 <- p2.forward() } if (!present(choices)) { matcher <- MatchEmpty$create() } else { matcher <- MatchChoices$create(choices) } } @type parseSingleChar (ReadIterator) -> (ReadIterator,optional MatcherTemplate) parseSingleChar (p) (p2,matcher) { // TODO: Needs error handling. Char c <- p.readCurrent() p2 <- p.forward() if (c == '\\') { matcher <- MatchSingle$create(p2.readCurrent()) p2 <- p.forward() } elif (c == '.') { matcher <- MatchAny$create() } else { matcher <- MatchSingle$create(c) } } @type parseCharChoices (ReadIterator) -> (ReadIterator,optional MatcherTemplate) parseCharChoices (p) (p2,matcher) { p2 <- p matcher <- empty optional Char previous <- empty Bool doRange <- false optional ReadSequence> choices <- empty while (!p2.pastForwardEnd()) { Char c <- p2.readCurrent() if (c == '\\') { p2 <- p2.forward() c <- p2.readCurrent() } if (c == ']') { break } elif (c == '-' && present(previous) && !doRange) { doRange <- true } elif (doRange) { choices <- LinkedNode>$create(MatchRange$create(require(previous),c),choices) previous <- empty doRange <- false } elif (present(previous)) { choices <- LinkedNode>$create(MatchSingle$create(require(previous)),choices) previous <- c } else { previous <- c } } update { p2 <- p2.forward() } if (present(previous)) { choices <- LinkedNode>$create(MatchSingle$create(require(previous)),choices) } matcher <- MatchChoices$create(choices) } }