concrete RegexDemo {
  @type run () -> ()
}

define RegexDemo {
  run () {
    ~ testMatch("","",true)
    ~ testMatch("","a",false)
    ~ testMatch(".","a",true)
    ~ testMatch(".","aa",false)
    ~ testMatch("a",".",false)
    ~ testMatch(".+","",false)
    ~ testMatch(".+","a",true)
    ~ testMatch(".+","ab",true)
    ~ testMatch(".*.","a",true)
    ~ testMatch(".*.","ab",true)
    ~ testMatch("(ab)*","",true)
    ~ testMatch("(ab)*","abab",true)
    ~ testMatch("(ab)*","a",false)
    ~ testMatch("(ab)*ac","abac",true)
    ~ testMatch("(ab)*ac","ac",true)
    ~ testMatch("(ab)*ac","aca",false)
    ~ testMatch("(ab)*ac","acab",false)
    ~ testMatch("(ab)*|ac","abab",true)
    ~ testMatch("(ab)*|ac","ac",true)
    ~ testMatch("(ab)*|ac","acac",false)
    ~ testMatch("(ab)*|ac","abac",false)
    ~ testMatch("a{2}","a",false)
    ~ testMatch("a{2}","aa",true)
    ~ testMatch("a{2}","aaa",false)
    ~ testMatch("a{2,3}","a",false)
    ~ testMatch("a{2,3}","aa",true)
    ~ testMatch("a{2,3}","aaa",true)
    ~ testMatch("a{2,3}","aaaa",false)
    ~ testMatch("(a+){4}","aaa",false)
    ~ testMatch("(a+){4}","aaaa",true)
    ~ testMatch("(a+){4}","aaaaa",true)
    ~ testMatch("(a+){4}","aaaaaa",true)
    ~ testMatch("[a-z?]","q",true)
    ~ testMatch("[a-z?]","-",false)
    ~ testMatch("[a-z?]","?",true)
    ~ testMatch("[a-z?]*","abcde?",true)
    ~ testMatch("[a-z?]*","ab-cde",false)

    // A few malicious regexes from https://en.wikipedia.org/wiki/ReDoS.
    //
    // The first three below don't finish, due to repeat branching. The
    // branching of "a+" is O(sqrt(2)^n) and "(a+)+" is therefore O(2^n).
    //
    // The first two could still be matched using non-branching repeats, since
    // there is a complete overlap between the tail of one repeat and the next
    // repeat, and repeats past 1 don't matter. The third could also be if the
    // first 30 were expanded at the beginning.
    //
    // Overall, the safer solution would be to avoid repeat branching, and have
    // a disclaimer about matching accuracy with repeated unbounded patterns
    // that overlap themselves.

    // ~ testMatch("(a+)+","aaaaaaaaaaaaaaaaaaaaaaaa!",false)
    // ~ testMatch("(a+)+","aaaaaaaaaaaaaaaaaaaaaaaa!",false)
    // ~ testMatch("([a-zA-Z]+)*","aaaaaaaaaaaaaaaaaaaaaaaa!",false)
    // ~ testMatch("(.*a){30}","aaaaaaaaaaaaaaaaaaaaaaaa!",false)
    ~ testMatch("(a|aa)+","aaaaaaaaaaaaaaaaaaaaaaaa!",false)

    // This causes a ton of branching, but it actually finishes. Given m
    // unbounded groups in a sequence, the branching is O(n^m). Since n^m
    // increases rapidly with n, most sequence branches at any given time will
    // not contain repeat branches within them. The exception is that each of
    // the m groups will be in separate sequence branches from the beginning,
    // *adding* O(sqrt(2)^n) branches. This means the overall complexity is
    // O(n^m)+O(sqrt(2)^n): O(n^m) for smaller n, O(sqrt(2)^n) for larger n.
    ~ testMatch("[ab]*[ac]*[ad]*[ae]*[af]*",
                "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaad",true)
  }

  @type testMatch (String,String,Bool) -> ()
  testMatch (pattern,data,expected) {
    MatcherTemplate<Char> template <- require(CharRegex$parse(pattern))
    ~ LazyStream<Formatted>$new()
        .append("Trying: regex=\"")
        .append(pattern)
        .append("\" data=\"")
        .append(data)
        .append("\" -> ")
        .append(expected)
        .append("...\n")
        .writeTo(SimpleOutput$stderr())
    if (CharRegex$match(template,data) != expected) {
      fail("Incorrect match result!")
    }
  }
}