You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

766 lines
39 KiB
Go

package regex
import (
"fmt"
"slices"
"testing"
)
var reTests = []struct {
re string
flags []ReFlag
str string
result []Group // Stores all zero-groups in the match
}{
{"a", nil, "abc", []Group{{0, 1}}},
{"a", nil, "bca", []Group{{2, 3}}},
{"l", nil, "ggllgg", []Group{{2, 3}, {3, 4}}},
{"(b|c)", nil, "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}},
{"a+", nil, "brerereraaaaabbbbb", []Group{{8, 13}}},
{"ab+", nil, "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}},
{"(b|c|A)", nil, "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}},
{"ab*", nil, "a", []Group{{0, 1}}},
{"ab*", nil, "abb", []Group{{0, 3}}},
{"a*b", nil, "aaab", []Group{{0, 4}}},
{"a*b", nil, "qwqw", []Group{}},
{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
{"b*a*a", nil, "bba", []Group{{0, 3}}},
{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
{"a(b|c)*d+", nil, "abccdd", []Group{{0, 6}}},
{"a*", nil, "", []Group{{0, 0}}},
{"a|b", nil, "c", []Group{}},
{"(a|b)*c", nil, "aabbc", []Group{{0, 5}}},
{"a(b|b)", nil, "ab", []Group{{0, 2}}},
{"a*", nil, "aaaaaaaa", []Group{{0, 8}, {8, 8}}},
{"ab?", nil, "ab", []Group{{0, 2}}},
{"a?b", nil, "ab", []Group{{0, 2}}},
{"a?", nil, "", []Group{{0, 0}}},
{"a?b?c", nil, "a", []Group{}},
{"a?b?c?", nil, "ab", []Group{{0, 2}, {2, 2}}},
{"a?b?c?", nil, "ac", []Group{{0, 2}, {2, 2}}},
{"a?b?c", nil, "abc", []Group{{0, 3}}},
{"a?b?c", nil, "acb", []Group{{0, 2}}},
{"[abc]", nil, "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}},
{"[ab]c", nil, "ab", []Group{}},
{"g[ab]c", nil, "gac", []Group{{0, 3}}},
{"g[ab]c", nil, "gbc", []Group{{0, 3}}},
{"g[ab]c", nil, "gc", []Group{}},
{"g[ab]c", nil, "gfc", []Group{}},
{"[ab]*", nil, "aabbbabaababab", []Group{{0, 14}, {14, 14}}},
{"[ab]+", nil, "aabbbablaababab", []Group{{0, 7}, {8, 15}}},
{"[Ff]r[Uu]it", nil, "fruit", []Group{{0, 5}}},
{"[Ff]r[Uu]it", nil, "FrUit", []Group{{0, 5}}},
{"[Ff]r[Uu|]it", nil, "Fr|it", []Group{{0, 5}}},
{"[Ff]r([Uu]|[pP])it", nil, "Frpit", []Group{{0, 5}}},
{"[Ff]r[Uu]|[pP]it", nil, "Frpit", []Group{{2, 5}}},
{"[a-zA-Z]+", nil, "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}},
{".+", nil, "Hello, how is it going?", []Group{{0, 23}}},
{"a.", nil, "a ", []Group{{0, 2}}},
{"a.b", nil, "a/b", []Group{{0, 3}}},
{".", nil, "a ", []Group{{0, 1}, {1, 2}}},
{"a.", nil, "a ", []Group{{0, 2}}},
{".+b", nil, "abc", []Group{{0, 2}}},
{`\d`, nil, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}},
{`\\`, nil, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}},
{`\W`, nil, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}},
{`\w`, nil, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}},
{`\s`, nil, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
{`\<`, nil, "<HTML><body>", []Group{{0, 1}, {6, 7}}},
{`\(.+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}},
{"[^abc]+", nil, "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}},
{"[^a]+", nil, "qqqaq", []Group{{0, 3}, {4, 5}}},
{"[^0-9]+", nil, "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}},
{"[^abc]+", nil, "ababababbababaccacacacaca", []Group{}},
{`\[`, nil, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}},
{`\([^)]+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}},
{"^ab", nil, "ab bab", []Group{{0, 2}}},
{"^aaaa^", nil, "aaaaaaaa", []Group{}},
{"^([bB][Gg])", nil, "bG", []Group{{0, 2}}},
{"b$", nil, "ba", []Group{}},
{"(boy|girl)$", nil, "girlf", []Group{}},
{`\bint\b`, nil, "print int integer", []Group{{6, 9}}},
{`int\b`, nil, "ints", []Group{}},
{`int(\b|a)`, nil, "inta", []Group{{0, 4}}},
{`\b\d+\b`, nil, "511 a3 43", []Group{{0, 3}, {7, 9}}},
{`\Bint\B`, nil, "prints int integer print", []Group{{2, 5}}},
{`^`, nil, "5^3^2", []Group{{0, 0}}},
{`\^`, nil, "5^3^2", []Group{{1, 2}, {3, 4}}},
{`pool$`, nil, "pool carpool", []Group{{8, 12}}},
{`^int$`, nil, "print int integer", []Group{}},
{`^int$`, nil, "int", []Group{{0, 3}}},
{`b*`, nil, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}},
{"a{4}", nil, "aabaaa", []Group{}},
{"ab{5}", nil, "abbbbbab", []Group{{0, 6}}},
{"(a|b){3,4}", nil, "aba", []Group{{0, 3}}},
{"(a|b){3,4}", nil, "ababaa", []Group{{0, 4}}},
{"(bc){5,}", nil, "bcbcbcbcbcbcbcbc", []Group{{0, 16}}},
{`\d{3,4}`, nil, "1209", []Group{{0, 4}}},
{`\d{3,4}`, nil, "109", []Group{{0, 3}}},
{`\d{3,4}`, nil, "5", []Group{}},
{`\d{3,4}`, nil, "123135", []Group{{0, 4}}},
{`\d{3,4}`, nil, "89a-0", []Group{}},
{`\d{3,4}`, nil, "ababab555", []Group{{6, 9}}},
{`\bpaint\b`, nil, "paints", []Group{}},
{`\b\w{5}\b`, nil, "paint", []Group{{0, 5}}},
{`[^\w]`, nil, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
{`[^\W]`, nil, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
{`[\[\]]`, nil, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
// Unicode tests
{`.+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
{`a.b`, nil, "a²b", []Group{{0, 3}}},
{`[^a]+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
// Fun experiment - AI-generated tests
{"(abc|def|ghi)", nil, "abcdefg", []Group{{0, 3}, {3, 6}}},
{"a(b|c)d", nil, "abcd", []Group{}},
{"a(b|c)*d", nil, "abcbcd", []Group{{0, 6}}},
{"a(b|c)+d", nil, "abcbcd", []Group{{0, 6}}},
{"a(b|c)?d", nil, "abd", []Group{{0, 3}}},
{".+", nil, "hello world", []Group{{0, 11}}},
{"a.b", nil, "aXb", []Group{{0, 3}}},
{"a.*b", nil, "aXb", []Group{{0, 3}}},
{"a.{2,3}b", nil, "aXXb", []Group{{0, 4}}},
{"a.{2,}b", nil, "aXXXb", []Group{{0, 5}}},
{"a.{0,3}b", nil, "ab", []Group{{0, 2}}},
{"[abc]+", nil, "abcabc", []Group{{0, 6}}},
{"[a-zA-Z]+", nil, "HelloWorld", []Group{{0, 10}}},
{"[^abc]+", nil, "defghi", []Group{{0, 6}}},
{"^hello", nil, "hello world", []Group{{0, 5}}},
{"world$", nil, "hello world", []Group{{6, 11}}},
{`\bhello\b`, nil, "hello world", []Group{{0, 5}}},
{`\Bhello\B`, nil, "hello world", []Group{}},
{"(hello|world)", nil, "hello world", []Group{{0, 5}, {6, 11}}},
{"(hello|world)+", nil, "hello world", []Group{{0, 5}, {6, 11}}},
{"(hello|world)*", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
{"(hello|world)?", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
{"ú.+ï", nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}},
{"(?=hello)", nil, "hello world", []Group{{0, 0}}},
{"(?!hello)", nil, "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
{"(?<=hello)", nil, "hello world", []Group{{5, 5}}},
{"(?<!hello)", nil, "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "40", []Group{{0, 2}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "040", []Group{}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "400", []Group{{0, 3}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "4000", []Group{}},
{"a{1,3}", nil, "aaaaa", []Group{{0, 3}, {3, 5}}},
{`\\[ab\\]`, nil, "a", []Group{}},
{`\\[ab\\]`, nil, `\a`, []Group{{0, 2}}},
2 months ago
// Lookaround tests
{"(?<=bo)y", nil, "boy", []Group{{2, 3}}},
{"bo(?=y)", nil, "boy", []Group{{0, 2}}},
{"(?<=f)f+(?=f)", nil, "fffff", []Group{{1, 4}}},
{"(?<=f)f+(?=f)", nil, "fffffa", []Group{{1, 4}}},
2 months ago
// Some POSIX charclass tests
{"[[:lower:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{0, 26}}},
{"[[:upper:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{26, 52}}},
{"[[:alpha:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{0, 52}}},
{"[[:digit:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{52, 62}}},
{"[[:alnum:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{0, 62}}},
{"[[:punct:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{62, 70}}},
{"[[:ascii:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{0, 70}}},
{"[[:graph:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{0, 70}}},
// Test cases from Python's RE test suite
{`[\1]`, nil, "\x01", []Group{{0, 1}}},
{`\0`, nil, "\x00", []Group{{0, 1}}},
{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
{`[a\0]`, nil, "\x00", []Group{{0, 1}}},
{`[^a\0]`, nil, "\x00", []Group{}},
{`\a[\b]\f\n\r\t\v`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
{`[\a][\b][\f][\n][\r][\t][\v]`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
{`\u`, nil, "", nil},
{`\xff`, nil, "ÿ", []Group{{0, 1}}},
{`\x00ffffffffffffff`, nil, "\xff", []Group{}},
{`\x00f`, nil, "\x0f", []Group{}},
{`\x00fe`, nil, "\xfe", []Group{}},
{`^\w+=(\\[\000-\277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
{`a.b`, nil, `acb`, []Group{{0, 3}}},
{`a.b`, nil, "a\nb", []Group{}},
{`a.*b`, nil, "acc\nccb", []Group{}},
{`a.{4,5}b`, nil, "acc\nccb", []Group{}},
{`a.b`, nil, "a\rb", []Group{{0, 3}}},
{`a.b`, []ReFlag{RE_SINGLE_LINE}, "a\nb", []Group{{0, 3}}},
{`a.*b`, []ReFlag{RE_SINGLE_LINE}, "acc\nccb", []Group{{0, 7}}},
{`a.{4,5}b`, []ReFlag{RE_SINGLE_LINE}, "acc\nccb", []Group{{0, 7}}},
{`)`, nil, ``, nil},
{`^$`, nil, ``, []Group{{0, 0}}},
{`abc`, nil, `abc`, []Group{{0, 3}}},
{`abc`, nil, `xbc`, []Group{}},
{`abc`, nil, `axc`, []Group{}},
{`abc`, nil, `abx`, []Group{}},
{`abc`, nil, `xabcy`, []Group{{1, 4}}},
{`abc`, nil, `ababc`, []Group{{2, 5}}},
{`ab*c`, nil, `abc`, []Group{{0, 3}}},
{`ab*bc`, nil, `abc`, []Group{{0, 3}}},
{`ab*bc`, nil, `abbc`, []Group{{0, 4}}},
{`ab*bc`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab{0,}c`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab+bc`, nil, `abbc`, []Group{{0, 4}}},
{`ab+bc`, nil, `abc`, []Group{}},
{`ab+bc`, nil, `abq`, []Group{}},
{`ab{1,}bc`, nil, `abq`, []Group{}},
{`ab+bc`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab{1,}bc`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab{1,3}bc`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab{3,4}bc`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab{4,5}bc`, nil, `abbbbc`, []Group{}},
{`ab?bc`, nil, `abbc`, []Group{{0, 4}}},
{`ab?bc`, nil, `abc`, []Group{{0, 3}}},
{`ab{0,1}bc`, nil, `abc`, []Group{{0, 3}}},
{`ab?bc`, nil, `abbbbc`, []Group{}},
{`ab?c`, nil, `abc`, []Group{{0, 3}}},
{`^abc$`, nil, `abc`, []Group{{0, 3}}},
{`^abc$`, nil, `abcc`, []Group{}},
{`^abc`, nil, `abcc`, []Group{{0, 3}}},
{`^abc$`, nil, `aabc`, []Group{}},
{`abc$`, nil, `aabc`, []Group{{1, 4}}},
{`^`, nil, `abc`, []Group{{0, 0}}},
{`$`, nil, `abc`, []Group{{3, 3}}},
{`a.c`, nil, `abc`, []Group{{0, 3}}},
{`a.c`, nil, `axc`, []Group{{0, 3}}},
{`a.*c`, nil, `axyzc`, []Group{{0, 5}}},
{`a.*c`, nil, `axyzd`, []Group{}},
{`a[bc]d`, nil, `abc`, []Group{}},
{`a[bc]d`, nil, `abd`, []Group{{0, 3}}},
{`a[b-d]e`, nil, `abd`, []Group{}},
{`a[b-d]e`, nil, `ace`, []Group{{0, 3}}},
{`a[b-d]`, nil, `aac`, []Group{{1, 3}}},
{`a[-b]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
{`a[\-b]`, nil, `a-`, []Group{{0, 2}}},
{`a[b-]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
{`a[]b`, nil, `-`, nil},
{`a[`, nil, `-`, nil},
{`a\`, nil, `-`, nil},
{`abc)`, nil, `-`, nil},
{`(abc`, nil, `-`, nil},
{`a]`, nil, `a]`, []Group{{0, 2}}},
{`a[]]b`, nil, `a]b`, []Group{{0, 3}}},
{`a[\]]b`, nil, `a]b`, []Group{{0, 3}}},
{`a[^bc]d`, nil, `aed`, []Group{{0, 3}}},
{`a[^bc]d`, nil, `abd`, []Group{}},
{`a[^-b]c`, nil, `adc`, []Group{{0, 3}}},
{`a[^-b]c`, nil, `a-c`, []Group{}},
{`a[^]b]c`, nil, `a]c`, []Group{}},
{`a[^]b]c`, nil, `adc`, []Group{{0, 3}}},
{`\ba\b`, nil, `a-`, []Group{{0, 1}}},
{`\ba\b`, nil, `-a`, []Group{{1, 2}}},
{`\ba\b`, nil, `-a-`, []Group{{1, 2}}},
{`\by\b`, nil, `xy`, []Group{}},
{`\by\b`, nil, `yz`, []Group{}},
{`\by\b`, nil, `xyz`, []Group{}},
{`x\b`, nil, `xyz`, []Group{}},
{`x\B`, nil, `xyz`, []Group{{0, 1}}},
{`\Bz`, nil, `xyz`, []Group{{2, 3}}},
{`z\B`, nil, `xyz`, []Group{}},
{`\Bx`, nil, `xyz`, []Group{}},
{`\Ba\B`, nil, `a-`, []Group{}},
{`\Ba\B`, nil, `-a`, []Group{}},
{`\Ba\B`, nil, `-a-`, []Group{}},
{`\By\B`, nil, `xy`, []Group{}},
{`\By\B`, nil, `yz`, []Group{}},
{`\By\b`, nil, `xy`, []Group{{1, 2}}},
{`\by\B`, nil, `yz`, []Group{{0, 1}}},
{`\By\B`, nil, `xyz`, []Group{{1, 2}}},
{`ab|cd`, nil, `abc`, []Group{{0, 2}}},
{`ab|cd`, nil, `abcd`, []Group{{0, 2}, {2, 4}}},
{`$b`, nil, `b`, []Group{}},
{`a\(b`, nil, `a(b`, []Group{{0, 3}}},
{`a\(*b`, nil, `ab`, []Group{{0, 2}}},
{`a\(*b`, nil, `a((b`, []Group{{0, 4}}},
{`a\\b`, nil, `a\b`, []Group{{0, 3}}},
{`a+b+c`, nil, `aabbabc`, []Group{{4, 7}}},
{`a{1,}b{1,}c`, nil, `aabbabc`, []Group{{4, 7}}},
{`)(`, nil, `-`, nil},
{`[^ab]*`, nil, `cde`, []Group{{0, 3}, {3, 3}}},
{`abc`, nil, ``, []Group{}},
{`a*`, nil, ``, []Group{{0, 0}}},
{`a|b|c|d|e`, nil, `e`, []Group{{0, 1}}},
{`abcd*efg`, nil, `abcdefg`, []Group{{0, 7}}},
{`ab*`, nil, `xabyabbbz`, []Group{{1, 3}, {4, 8}}},
{`ab*`, nil, `xayabbbz`, []Group{{1, 2}, {3, 7}}},
{`[abhgefdc]ij`, nil, `hij`, []Group{{0, 3}}},
{`a[bcd]*dcdcde`, nil, `adcdcde`, []Group{{0, 7}}},
{`a[bcd]+dcdcde`, nil, `adcdcde`, []Group{}},
{`[a-zA-Z_][a-zA-Z0-9_]*`, nil, `alpha`, []Group{{0, 5}}},
{`multiple words of text`, nil, `uh-uh`, []Group{}},
{`multiple words`, nil, `multiple words, yeah`, []Group{{0, 14}}},
{`[k]`, nil, `ab`, []Group{}},
{`a[-]?c`, nil, `ac`, []Group{{0, 2}}},
{`^(.+)?B`, nil, `AB`, []Group{{0, 2}}},
{`\0009`, nil, "\x009", []Group{{0, 2}}},
{`\141`, nil, "a", []Group{{0, 1}}},
// At this point, the python test suite has a bunch
// of backreference tests. Since my engine doesn't
// implement backreferences, I've skipped those tests.
{`*a`, nil, ``, nil},
{`(*)b`, nil, ``, nil},
{`a**`, nil, ``, nil},
{`^`, nil, `abc`, []Group{{0, 0}}},
{`$`, nil, `abc`, []Group{{3, 3}}},
{`a[b-]`, nil, `a-`, []Group{{0, 2}}},
{`a[b-a]`, nil, `a-`, nil},
// Case-insensitive matching tests
{`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
{`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `XBC`, []Group{}},
{`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `AXC`, []Group{}},
{`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABX`, []Group{}},
{`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `XABCY`, []Group{{1, 4}}},
{`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABABC`, []Group{{2, 5}}},
{`ab*c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
{`ab*bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
{`ab*bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}},
{`ab*bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}},
{`ab{0,}c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}},
{`ab+bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}},
{`ab+bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{}},
{`ab+bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABQ`, []Group{}},
{`ab{1,}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABQ`, []Group{}},
{`ab+bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}},
{`ab{1,}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}},
{`ab{1,3}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}},
{`ab{3,4}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}},
{`ab{4,5}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{}},
{`ab?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}},
{`ab?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
{`ab{0,1}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
{`ab?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{}},
{`ab?c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
{`^abc$`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
{`^abc$`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCC`, []Group{}},
{`^abc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCC`, []Group{{0, 3}}},
{`^abc$`, []ReFlag{RE_CASE_INSENSITIVE}, `AABC`, []Group{}},
{`abc$`, []ReFlag{RE_CASE_INSENSITIVE}, `AABC`, []Group{{1, 4}}},
{`^`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 0}}},
{`$`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{3, 3}}},
{`a.c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
{`a.c`, []ReFlag{RE_CASE_INSENSITIVE}, `AXC`, []Group{{0, 3}}},
{`a.*c`, []ReFlag{RE_CASE_INSENSITIVE}, `AXYZC`, []Group{{0, 5}}},
{`a.*c`, []ReFlag{RE_CASE_INSENSITIVE}, `AXYZD`, []Group{}},
{`a[bc]d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{}},
{`a[bc]d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABD`, []Group{{0, 3}}},
{`a[b-d]e`, []ReFlag{RE_CASE_INSENSITIVE}, `ABD`, []Group{}},
{`a[b-d]e`, []ReFlag{RE_CASE_INSENSITIVE}, `ACE`, []Group{{0, 3}}},
{`a[b-d]`, []ReFlag{RE_CASE_INSENSITIVE}, `AAC`, []Group{{1, 3}}},
{`a[-b]`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
{`a[\-b]`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{{0, 2}}},
{`a[b-]`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
{`a[]b`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil},
{`a[`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil},
{`a\`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil},
{`abc)`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil},
{`(abc`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil},
{`a]`, []ReFlag{RE_CASE_INSENSITIVE}, `A]`, []Group{{0, 2}}},
{`a[]]b`, []ReFlag{RE_CASE_INSENSITIVE}, `A]B`, []Group{{0, 3}}},
{`a[\]]b`, []ReFlag{RE_CASE_INSENSITIVE}, `A]B`, []Group{{0, 3}}},
{`a[^bc]d`, []ReFlag{RE_CASE_INSENSITIVE}, `AED`, []Group{{0, 3}}},
{`a[^bc]d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABD`, []Group{}},
{`a[^-b]c`, []ReFlag{RE_CASE_INSENSITIVE}, `ADC`, []Group{{0, 3}}},
{`a[^-b]c`, []ReFlag{RE_CASE_INSENSITIVE}, `A-C`, []Group{}},
{`a[^]b]c`, []ReFlag{RE_CASE_INSENSITIVE}, `A]C`, []Group{}},
{`a[^]b]c`, []ReFlag{RE_CASE_INSENSITIVE}, `ADC`, []Group{{0, 3}}},
{`\ba\b`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{{0, 1}}},
{`\ba\b`, []ReFlag{RE_CASE_INSENSITIVE}, `-A`, []Group{{1, 2}}},
{`\ba\b`, []ReFlag{RE_CASE_INSENSITIVE}, `-A-`, []Group{{1, 2}}},
{`\by\b`, []ReFlag{RE_CASE_INSENSITIVE}, `XY`, []Group{}},
{`\by\b`, []ReFlag{RE_CASE_INSENSITIVE}, `YZ`, []Group{}},
{`\by\b`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{}},
{`x\b`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{}},
{`x\B`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{{0, 1}}},
{`\Bz`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{{2, 3}}},
{`z\B`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{}},
{`\Bx`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{}},
{`\Ba\B`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{}},
{`\Ba\B`, []ReFlag{RE_CASE_INSENSITIVE}, `-A`, []Group{}},
{`\Ba\B`, []ReFlag{RE_CASE_INSENSITIVE}, `-A-`, []Group{}},
{`\By\B`, []ReFlag{RE_CASE_INSENSITIVE}, `XY`, []Group{}},
{`\By\B`, []ReFlag{RE_CASE_INSENSITIVE}, `YZ`, []Group{}},
{`\By\b`, []ReFlag{RE_CASE_INSENSITIVE}, `XY`, []Group{{1, 2}}},
{`\by\B`, []ReFlag{RE_CASE_INSENSITIVE}, `YZ`, []Group{{0, 1}}},
{`\By\B`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{{1, 2}}},
{`ab|cd`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 2}}},
{`ab|cd`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Group{{0, 2}, {2, 4}}},
{`$b`, []ReFlag{RE_CASE_INSENSITIVE}, `B`, []Group{}},
{`a\(b`, []ReFlag{RE_CASE_INSENSITIVE}, `A(B`, []Group{{0, 3}}},
{`a\(*b`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Group{{0, 2}}},
{`a\(*b`, []ReFlag{RE_CASE_INSENSITIVE}, `A((B`, []Group{{0, 4}}},
{`a\\b`, []ReFlag{RE_CASE_INSENSITIVE}, `A\B`, []Group{{0, 3}}},
{`a+b+c`, []ReFlag{RE_CASE_INSENSITIVE}, `AABBABC`, []Group{{4, 7}}},
{`a{1,}b{1,}c`, []ReFlag{RE_CASE_INSENSITIVE}, `AABBABC`, []Group{{4, 7}}},
{`)(`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil},
{`[^ab]*`, []ReFlag{RE_CASE_INSENSITIVE}, `CDE`, []Group{{0, 3}, {3, 3}}},
{`abc`, []ReFlag{RE_CASE_INSENSITIVE}, ``, []Group{}},
{`a*`, []ReFlag{RE_CASE_INSENSITIVE}, ``, []Group{{0, 0}}},
{`a|b|c|d|e`, []ReFlag{RE_CASE_INSENSITIVE}, `E`, []Group{{0, 1}}},
{`abcd*efg`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDEFG`, []Group{{0, 7}}},
{`ab*`, []ReFlag{RE_CASE_INSENSITIVE}, `XABYABBBZ`, []Group{{1, 3}, {4, 8}}},
{`ab*`, []ReFlag{RE_CASE_INSENSITIVE}, `XAYABBBZ`, []Group{{1, 2}, {3, 7}}},
{`[abhgefdc]ij`, []ReFlag{RE_CASE_INSENSITIVE}, `HIJ`, []Group{{0, 3}}},
{`a[bcd]*dcdcde`, []ReFlag{RE_CASE_INSENSITIVE}, `ADCDCDE`, []Group{{0, 7}}},
{`a[bcd]+dcdcde`, []ReFlag{RE_CASE_INSENSITIVE}, `ADCDCDE`, []Group{}},
{`[a-zA-Z_][a-zA-Z0-9_]*`, []ReFlag{RE_CASE_INSENSITIVE}, `ALPHA`, []Group{{0, 5}}},
{`multiple words of text`, []ReFlag{RE_CASE_INSENSITIVE}, `UH-UH`, []Group{}},
{`multiple words`, []ReFlag{RE_CASE_INSENSITIVE}, `MULTIPLE WORDS, YEAH`, []Group{{0, 14}}},
{`[k]`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Group{}},
{`a[-]?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AC`, []Group{{0, 2}}},
{`^(.+)?B`, []ReFlag{RE_CASE_INSENSITIVE}, `ab`, []Group{{0, 2}}},
{`\0009`, []ReFlag{RE_CASE_INSENSITIVE}, "\x009", []Group{{0, 2}}},
{`\141`, []ReFlag{RE_CASE_INSENSITIVE}, "A", []Group{{0, 1}}},
{`a[-]?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AC`, []Group{{0, 2}}},
{`a(?!b).`, nil, `abad`, []Group{{2, 4}}},
{`a(?=d).`, nil, `abad`, []Group{{2, 4}}},
{`a(?=c|d).`, nil, `abad`, []Group{{2, 4}}},
{`^abc`, nil, "jkl\nabc\nxyz", []Group{}},
{`^abc`, []ReFlag{RE_MULTILINE}, "jkl\nabc\nxyz", []Group{{4, 7}}},
{`abc$`, nil, "jkl\nabc\nxyz", []Group{}},
{`abc$`, []ReFlag{RE_MULTILINE}, "jkl\nabc\nxyz", []Group{{4, 7}}},
{`abc$`, []ReFlag{RE_MULTILINE}, "jkl\n123abc\nxyz", []Group{{7, 10}}},
{`^`, nil, "jkl\n123abc\nxyz", []Group{{0, 0}}},
{`^`, []ReFlag{RE_MULTILINE}, "jkl\n123abc\nxyz", []Group{{0, 0}, {4, 4}, {11, 11}}},
{`\A`, []ReFlag{RE_MULTILINE}, "jkl\n123abc\nxyz", []Group{{0, 0}}},
{`$`, nil, "jkl\n123abc\nxyz", []Group{{14, 14}}},
{`$`, []ReFlag{RE_MULTILINE}, "jkl\n123abc\nxyz", []Group{{3, 3}, {10, 10}, {14, 14}}},
{`\z`, []ReFlag{RE_MULTILINE}, "jkl\n123abc\nxyz", []Group{{14, 14}}},
{`^abc\z`, []ReFlag{RE_MULTILINE}, "abc\nabc\nabc", []Group{{8, 11}}},
{`a.b`, nil, "a\nb", []Group{}},
{`a.b`, []ReFlag{RE_SINGLE_LINE}, "a\nb", []Group{{0, 3}}},
{`\w+`, nil, `--ab_cd0123--`, []Group{{2, 11}}},
{`[\w]+`, nil, `--ab_cd0123--`, []Group{{2, 11}}},
{`\D+`, nil, `1234abc5678`, []Group{{4, 7}}},
{`[\D]+`, nil, `1234abc5678`, []Group{{4, 7}}},
{`[\D5]+`, nil, `1234abc5678`, []Group{{4, 8}}},
{`[\da-fA-F]+`, nil, `123abc`, []Group{{0, 6}}},
{`\xff`, nil, "\u00ff", []Group{{0, 1}}},
{`\xFF`, nil, "\u00ff", []Group{{0, 1}}},
{`\x00ff`, nil, "\u00ff", []Group{}},
{`\x{0000ff}`, nil, "\u00ff", []Group{{0, 1}}},
{`\x{0000FF}`, nil, "\u00ff", []Group{{0, 1}}},
{"\t\n\v\r\f\a", nil, "\t\n\v\r\f\a", []Group{{0, 6}}},
{`\t\n\v\r\f\a`, nil, "\t\n\v\r\f\a", []Group{{0, 6}}},
{`[\t][\n][\v][\r][\f][\b]`, nil, "\t\n\v\r\f\b", []Group{{0, 6}}},
{`.*d`, nil, "abc\nabd", []Group{{4, 7}}},
{`(`, nil, "-", nil},
{`[\41]`, nil, `!`, []Group{{0, 1}}},
{`(?<!abc)(d.f)`, nil, `abcdefdof`, []Group{{6, 9}}},
{`[\w-]+`, nil, `laser_beam`, []Group{{0, 10}}},
{`M+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
{`m+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
{`[M]+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
{`[m]+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
{`^*`, nil, `-`, nil},
{`a[^>]*b`, nil, `a>b`, []Group{}},
{`^a*$`, nil, `foo`, []Group{}},
// Out-of-bounds for character classes
{`[b-e]`, nil, `a`, []Group{}},
{`[b-e]`, nil, `f`, []Group{}},
{`*?`, nil, `-`, nil},
{`a*?`, nil, `-`, nil}, // non-greedy operators are not supported
// Numeric range tests - this is a feature that I added, and doesn't exist
// in any other mainstream regex engine
{`<0-255>`, nil, `0`, []Group{{0, 1}}},
{`<0-255>`, nil, `7`, []Group{{0, 1}}},
{`<0-255>`, nil, `46`, []Group{{0, 2}}},
{`<0-255>`, nil, `90`, []Group{{0, 2}}},
{`<0-255>`, nil, `107`, []Group{{0, 3}}},
{`<0-255>`, nil, `198`, []Group{{0, 3}}},
{`<0-255>`, nil, `254`, []Group{{0, 3}}},
{`<0-255>`, nil, `255`, []Group{{0, 3}}},
{`<0-255>`, nil, `256`, []Group{{0, 2}, {2, 3}}},
{`^<0-255>$`, nil, `256`, []Group{}},
{`^<0-299792458>$`, nil, `299000999`, []Group{{0, 9}}},
{`^<0-299792458>$`, nil, `299792531`, []Group{}},
{`^<3-0>$`, nil, `-`, nil},
{`^<0-0>$`, nil, `0`, []Group{{0, 1}}},
{`2<0-55>`, nil, `231`, []Group{{0, 3}}},
{`2<0-55>`, nil, `271`, []Group{{0, 2}}},
{`^2<0-55>$`, nil, `271`, []Group{}},
{`<389`, nil, `-`, nil},
{`<389>`, nil, `-`, nil},
{`<-389>`, nil, `-`, nil},
{`<389->`, nil, `-`, nil},
{`<389-400`, nil, `-`, nil},
{`<389-400>`, nil, `391`, []Group{{0, 3}}},
{`\b<1-10000>\b`, nil, `America declared independence in 1776.`, []Group{{33, 37}}},
}
var groupTests = []struct {
re string
flags []ReFlag
str string
result []Match
}{
{"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
{"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
{"(0)", nil, "ab", []Match{[]Group{}}},
{"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
{"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
{"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
{"(a)|(b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}},
{"(a+)(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}},
{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
{"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
{"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
{"a((b.d){3})", nil, "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}},
// Test cases from Python's RE test suite
{`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, nil, `abcdefghijkl9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}},
{`()ef`, nil, `def`, []Match{[]Group{{1, 3}, {1, 1}}}},
{`(?:)ef`, nil, `def`, []Match{[]Group{{1, 3}}}},
{`(?:)`, nil, `def`, []Match{[]Group{{0, 0}}, []Group{{1, 1}}, []Group{{2, 2}}, []Group{{3, 3}}}},
{`((a))`, nil, `abc`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}}}},
{`(a)b(c)`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 1}, {2, 3}}}},
{`(a+|b)*`, nil, `ab`, []Match{[]Group{{0, 2}, {1, 2}}, []Group{{2, 2}}}},
{`(a+|b){0,}`, nil, `ab`, []Match{[]Group{{0, 2}, {1, 2}}, []Group{{2, 2}}}},
{`(a+|b)+`, nil, `ab`, []Match{[]Group{{0, 2}, {1, 2}}}},
{`(a+|b){1,}`, nil, `ab`, []Match{[]Group{{0, 2}, {1, 2}}}},
{`(a+|b)?`, nil, `ab`, []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}, []Group{{2, 2}}}},
{`(a+|b){0,1}`, nil, `ab`, []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}, []Group{{2, 2}}}},
{`(a|b|c|d|e)f`, nil, `ef`, []Match{[]Group{{0, 2}, {0, 1}}}},
{`(ab|cd)e`, nil, `abcde`, []Match{[]Group{{2, 5}, {2, 4}}}},
{`^(ab|cd)e`, nil, `abcde`, []Match{}},
{`(abc|)ef`, nil, `abcdef`, []Match{[]Group{{4, 6}, {4, 4}}}},
{`(a|b)c*d`, nil, `abcd`, []Match{[]Group{{1, 4}, {1, 2}}}},
{`(ab|ab*)bc`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 1}}}},
{`a([bc]*)c*`, nil, `abc`, []Match{[]Group{{0, 3}, {1, 3}}}},
{`a([bc]*)(c*d)`, nil, `abcd`, []Match{[]Group{{0, 4}, {1, 3}, {3, 4}}}},
{`a([bc]+)(c*d)`, nil, `abcd`, []Match{[]Group{{0, 4}, {1, 3}, {3, 4}}}},
{`a([bc]*)(c+d)`, nil, `abcd`, []Match{[]Group{{0, 4}, {1, 2}, {2, 4}}}},
{`(ab|a)b*c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
{`((a)(b)c)(d)`, nil, `abcd`, []Match{[]Group{{0, 4}, {0, 3}, {0, 1}, {1, 2}, {3, 4}}}},
{`^a(bc+|b[eh])g|.h$`, nil, `abh`, []Match{[]Group{{1, 3}}}},
{`(bc+d$|ef*g.|h?i(j|k))`, nil, `effgz`, []Match{[]Group{{0, 5}, {0, 5}}}},
{`(bc+d$|ef*g.|h?i(j|k))`, nil, `ij`, []Match{[]Group{{0, 2}, {0, 2}, {1, 2}}}},
{`(bc+d$|ef*g.|h?i(j|k))`, nil, `effg`, []Match{}},
{`(bc+d$|ef*g.|h?i(j|k))`, nil, `bcdd`, []Match{}},
{`(bc+d$|ef*g.|h?i(j|k))`, nil, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
{`(((((((((a)))))))))`, nil, `a`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
{`(((((((((a)))))))))\41`, nil, `a`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
// At this point, the python test suite has a bunch
// of backreference tests. Since my engine doesn't
// implement backreferences, I've skipped those tests.
{`(a)(b)c|ab`, nil, `ab`, []Match{[]Group{{0, 2}}}},
{`(a)+x`, nil, `aaax`, []Match{[]Group{{0, 4}, {2, 3}}}},
{`([ac])+x`, nil, `aacx`, []Match{[]Group{{0, 4}, {2, 3}}}},
{`([^/]*/)*sub1/`, nil, `d:msgs/tdir/sub1/trial/away.cpp`, []Match{[]Group{{0, 17}, {7, 12}}}},
{`([^.]*)\.([^:]*):[T ]+(.*)`, nil, `track1.title:TBlah blah blah`, []Match{[]Group{{0, 28}, {0, 6}, {7, 12}, {14, 28}}}},
{`([^N]*N)+`, nil, `abNNxyzN`, []Match{[]Group{{0, 8}, {4, 8}}}},
{`([^N]*N)+`, nil, `abNNxyz`, []Match{[]Group{{0, 4}, {3, 4}}}},
{`([abc]*)x`, nil, `abcx`, []Match{[]Group{{0, 4}, {0, 3}}}},
{`([abc]*)x`, nil, `abc`, []Match{}},
{`([xyz]*)x`, nil, `abcx`, []Match{[]Group{{3, 4}, {3, 3}}}},
{`(a)+b|aac`, nil, `aac`, []Match{[]Group{{0, 3}}}},
{`([abc])*d`, nil, `abbbcd`, []Match{[]Group{{0, 6}, {4, 5}}}},
{`([abc])*bcd`, nil, `abcd`, []Match{[]Group{{0, 4}, {0, 1}}}},
{`^(ab|cd)e`, nil, `abcde`, []Match{}},
// Case-insensitive tests
{`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDEFGHIJKL9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}},
{`()ef`, []ReFlag{RE_CASE_INSENSITIVE}, `DEF`, []Match{[]Group{{1, 3}, {1, 1}}}},
{`(?:)ef`, []ReFlag{RE_CASE_INSENSITIVE}, `DEF`, []Match{[]Group{{1, 3}}}},
{`(?:)`, []ReFlag{RE_CASE_INSENSITIVE}, `DEF`, []Match{[]Group{{0, 0}}, []Group{{1, 1}}, []Group{{2, 2}}, []Group{{3, 3}}}},
{`((a))`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}}}},
{`(a)b(c)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 3}, {0, 1}, {2, 3}}}},
{`(a+|b)*`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}, {1, 2}}, []Group{{2, 2}}}},
{`(a+|b){0,}`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}, {1, 2}}, []Group{{2, 2}}}},
{`(a+|b)+`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}, {1, 2}}}},
{`(a+|b){1,}`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}, {1, 2}}}},
{`(a+|b)?`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}, []Group{{2, 2}}}},
{`(a+|b){0,1}`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}, []Group{{2, 2}}}},
{`(a|b|c|d|e)f`, []ReFlag{RE_CASE_INSENSITIVE}, `EF`, []Match{[]Group{{0, 2}, {0, 1}}}},
{`(ab|cd)e`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{2, 5}, {2, 4}}}},
{`^(ab|cd)e`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{}},
{`(abc|)ef`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDEF`, []Match{[]Group{{4, 6}, {4, 4}}}},
{`(a|b)c*d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{1, 4}, {1, 2}}}},
{`(ab|ab*)bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 3}, {0, 1}}}},
{`a([bc]*)c*`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 3}, {1, 3}}}},
{`a([bc]*)(c*d)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {1, 3}, {3, 4}}}},
{`a([bc]+)(c*d)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {1, 3}, {3, 4}}}},
{`a([bc]*)(c+d)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {1, 2}, {2, 4}}}},
{`(ab|a)b*c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 3}, {0, 2}}}},
{`((a)(b)c)(d)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {0, 3}, {0, 1}, {1, 2}, {3, 4}}}},
{`^a(bc+|b[eh])g|.h$`, []ReFlag{RE_CASE_INSENSITIVE}, `ABH`, []Match{[]Group{{1, 3}}}},
{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `EFFGZ`, []Match{[]Group{{0, 5}, {0, 5}}}},
{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `IJ`, []Match{[]Group{{0, 2}, {0, 2}, {1, 2}}}},
{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `EFFG`, []Match{}},
{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `BCDD`, []Match{}},
{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
{`(((((((((a)))))))))`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
{`(.*)c(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
{`\((.*), (.*)\)`, []ReFlag{RE_CASE_INSENSITIVE}, `(A, B)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
{`(a)(b)c|ab`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}}}},
{`(a)+x`, []ReFlag{RE_CASE_INSENSITIVE}, `AAAX`, []Match{[]Group{{0, 4}, {2, 3}}}},
{`([ac])+x`, []ReFlag{RE_CASE_INSENSITIVE}, `AACX`, []Match{[]Group{{0, 4}, {2, 3}}}},
{`([^/]*/)*sub1/`, []ReFlag{RE_CASE_INSENSITIVE}, `D:MSGS/TDIR/SUB1/TRIAL/AWAY.CPP`, []Match{[]Group{{0, 17}, {7, 12}}}},
{`([^.]*)\.([^:]*):[T ]+(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `TRACK1.TITLE:TBLAH BLAH BLAH`, []Match{[]Group{{0, 28}, {0, 6}, {7, 12}, {14, 28}}}},
{`([^N]*N)+`, []ReFlag{RE_CASE_INSENSITIVE}, `ABNNXYZN`, []Match{[]Group{{0, 8}, {4, 8}}}},
{`([^N]*N)+`, []ReFlag{RE_CASE_INSENSITIVE}, `ABNNXYZ`, []Match{[]Group{{0, 4}, {3, 4}}}},
{`([abc]*)x`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCX`, []Match{[]Group{{0, 4}, {0, 3}}}},
{`([abc]*)x`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{}},
{`([xyz]*)x`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCX`, []Match{[]Group{{3, 4}, {3, 3}}}},
{`(a)+b|aac`, []ReFlag{RE_CASE_INSENSITIVE}, `AAC`, []Match{[]Group{{0, 3}}}},
{`([abc])*d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBCD`, []Match{[]Group{{0, 6}, {4, 5}}}},
{`([abc])*bcd`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {0, 1}}}},
{`^(ab|cd)e`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{}},
{`(?:(?:(?:(?:(?:(?:a))))))`, nil, `a`, []Match{[]Group{{0, 1}}}},
{`a(?:b|c|d)(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
{`a(?:b|c|d)*(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
{`a(?:b|c|d)+(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
{`a(?:b|(c|e){1,2}?|d)+(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {1, 2}, {2, 3}}}},
{`(?<!-):(.*)(?<!-):`, nil, `a:bc-:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
{`(?<!\\):(.*)(?<!\\):`, nil, `a:bc\:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
{`(?<!\?)'(.*)(?<!\?)'`, nil, `a'bc?'de'f`, []Match{[]Group{{1, 9}, {2, 8}}}},
{`([\s]*)([\S]*)([\s]*)`, nil, ` testing!1972`, []Match{[]Group{{0, 13}, {0, 1}, {1, 13}, {13, 13}}, []Group{{13, 13}, {13, 13}, {13, 13}, {13, 13}}}},
{`(\s*)(\S*)(\s*)`, nil, ` testing!1972`, []Match{[]Group{{0, 13}, {0, 1}, {1, 13}, {13, 13}}, []Group{{13, 13}, {13, 13}, {13, 13}, {13, 13}}}},
{`(([a-z]+):)?([a-z]+)$`, nil, `smil`, []Match{[]Group{{0, 4}, {-1, -1}, {-1, -1}, {0, 4}}}},
{`(x?)?`, nil, `x`, []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}}},
{`"(?:\\"|[^"])*"`, nil, `"\""`, []Match{[]Group{{0, 4}}}},
{`^((a)c)?(ab)$`, nil, `ab`, []Match{[]Group{{0, 2}, {-1, -1}, {-1, -1}, {0, 2}}}},
{`^([ab]*)(?=(b)?)c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
{`^([ab]*)(?!(b))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
{`^([ab]*)(?<!(a))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
{`(<389-400>)`, nil, `391`, []Match{[]Group{{0, 3}, {0, 3}}}},
}
func TestFindAllMatches(t *testing.T) {
for _, test := range reTests {
t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re, test.flags...)
if err != nil {
if test.result != nil {
panic(fmt.Errorf("Test Error: %v", err))
}
} else {
matchIndices := regComp.FindAll(test.str)
if !slices.Equal(test.result, matchIndices) {
t.Errorf("Wanted %v Got %v\n", test.result, matchIndices)
}
}
})
}
}
func TestFindString(t *testing.T) {
for _, test := range reTests {
t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re, test.flags...)
if err != nil {
if test.result != nil {
panic(err)
}
} else {
foundString := regComp.FindString(test.str)
if len(test.result) == 0 {
if foundString != "" {
t.Errorf("Expected no match got %v\n", foundString)
}
} else {
expectedString := test.str[test.result[0].StartIdx:test.result[0].EndIdx]
if foundString != expectedString {
t.Errorf("Wanted %v Got %v\n", expectedString, foundString)
}
}
}
})
}
}
func TestFindAllStrings(t *testing.T) {
for _, test := range reTests {
t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re, test.flags...)
if err != nil {
if test.result != nil {
panic(err)
}
} else {
foundStrings := regComp.FindAllString(test.str)
if len(test.result) != len(foundStrings) {
t.Errorf("Differing number of matches: Wanted %v matches Got %v matches\n", len(test.result), len(foundStrings))
} else {
for idx, group := range test.result {
groupStr := test.str[group.StartIdx:group.EndIdx]
if groupStr != foundStrings[idx] {
t.Errorf("Wanted %v Got %v\n", groupStr, foundStrings[idx])
}
}
}
}
})
}
}
func TestFindAllGroups(t *testing.T) {
for _, test := range groupTests {
t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re, test.flags...)
if err != nil {
if test.result != nil {
panic(err)
}
}
matchIndices := regComp.FindAllSubmatch(test.str)
for i := range matchIndices {
for j := range matchIndices[i] {
if matchIndices[i][j].isValid() {
if test.result[i][j] != matchIndices[i][j] {
t.Errorf("Wanted %v Got %v\n", test.result, matchIndices)
}
}
}
}
})
}
}