diff --git a/re_test.go b/re_test.go index ba26fe6..06189ce 100644 --- a/re_test.go +++ b/re_test.go @@ -322,6 +322,114 @@ var reTests = []struct { {`a[b-]`, nil, `a-`, []Group{{0, 2}}}, {`a[b-a]`, nil, `a-`, nil}, + // Case-insensitive matching tests + {`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}}, + {`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `XBC`, []Group{}}, + {`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `AXC`, []Group{}}, + {`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABX`, []Group{}}, + {`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `XABCY`, []Group{{1, 4}}}, + {`abc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABABC`, []Group{{2, 5}}}, + {`ab*c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}}, + {`ab*bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}}, + {`ab*bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}}, + {`ab*bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}}, + {`ab{0,}c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}}, + {`ab+bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}}, + {`ab+bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{}}, + {`ab+bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABQ`, []Group{}}, + {`ab{1,}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABQ`, []Group{}}, + {`ab+bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}}, + {`ab{1,}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}}, + {`ab{1,3}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}}, + {`ab{3,4}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}}, + {`ab{4,5}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{}}, + {`ab?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}}, + {`ab?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}}, + {`ab{0,1}bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}}, + {`ab?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{}}, + {`ab?c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}}, + {`^abc$`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}}, + {`^abc$`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCC`, []Group{}}, + {`^abc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCC`, []Group{{0, 3}}}, + {`^abc$`, []ReFlag{RE_CASE_INSENSITIVE}, `AABC`, []Group{}}, + {`abc$`, []ReFlag{RE_CASE_INSENSITIVE}, `AABC`, []Group{{1, 4}}}, + {`^`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 0}}}, + {`$`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{3, 3}}}, + {`a.c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}}, + {`a.c`, []ReFlag{RE_CASE_INSENSITIVE}, `AXC`, []Group{{0, 3}}}, + {`a.*c`, []ReFlag{RE_CASE_INSENSITIVE}, `AXYZC`, []Group{{0, 5}}}, + {`a.*c`, []ReFlag{RE_CASE_INSENSITIVE}, `AXYZD`, []Group{}}, + {`a[bc]d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{}}, + {`a[bc]d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABD`, []Group{{0, 3}}}, + {`a[b-d]e`, []ReFlag{RE_CASE_INSENSITIVE}, `ABD`, []Group{}}, + {`a[b-d]e`, []ReFlag{RE_CASE_INSENSITIVE}, `ACE`, []Group{{0, 3}}}, + {`a[b-d]`, []ReFlag{RE_CASE_INSENSITIVE}, `AAC`, []Group{{1, 3}}}, + {`a[-b]`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen + {`a[\-b]`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{{0, 2}}}, + {`a[b-]`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen + + {`a[]b`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil}, + {`a[`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil}, + {`a\`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil}, + {`abc)`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil}, + {`(abc`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil}, + {`a]`, []ReFlag{RE_CASE_INSENSITIVE}, `A]`, []Group{{0, 2}}}, + {`a[]]b`, []ReFlag{RE_CASE_INSENSITIVE}, `A]B`, []Group{{0, 3}}}, + {`a[\]]b`, []ReFlag{RE_CASE_INSENSITIVE}, `A]B`, []Group{{0, 3}}}, + {`a[^bc]d`, []ReFlag{RE_CASE_INSENSITIVE}, `AED`, []Group{{0, 3}}}, + {`a[^bc]d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABD`, []Group{}}, + {`a[^-b]c`, []ReFlag{RE_CASE_INSENSITIVE}, `ADC`, []Group{{0, 3}}}, + {`a[^-b]c`, []ReFlag{RE_CASE_INSENSITIVE}, `A-C`, []Group{}}, + {`a[^]b]c`, []ReFlag{RE_CASE_INSENSITIVE}, `A]C`, []Group{}}, + {`a[^]b]c`, []ReFlag{RE_CASE_INSENSITIVE}, `ADC`, []Group{{0, 3}}}, + {`\ba\b`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{{0, 1}}}, + {`\ba\b`, []ReFlag{RE_CASE_INSENSITIVE}, `-A`, []Group{{1, 2}}}, + {`\ba\b`, []ReFlag{RE_CASE_INSENSITIVE}, `-A-`, []Group{{1, 2}}}, + {`\by\b`, []ReFlag{RE_CASE_INSENSITIVE}, `XY`, []Group{}}, + {`\by\b`, []ReFlag{RE_CASE_INSENSITIVE}, `YZ`, []Group{}}, + {`\by\b`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{}}, + {`x\b`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{}}, + {`x\B`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{{0, 1}}}, + {`\Bz`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{{2, 3}}}, + {`z\B`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{}}, + {`\Bx`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{}}, + {`\Ba\B`, []ReFlag{RE_CASE_INSENSITIVE}, `A-`, []Group{}}, + {`\Ba\B`, []ReFlag{RE_CASE_INSENSITIVE}, `-A`, []Group{}}, + {`\Ba\B`, []ReFlag{RE_CASE_INSENSITIVE}, `-A-`, []Group{}}, + {`\By\B`, []ReFlag{RE_CASE_INSENSITIVE}, `XY`, []Group{}}, + {`\By\B`, []ReFlag{RE_CASE_INSENSITIVE}, `YZ`, []Group{}}, + {`\By\b`, []ReFlag{RE_CASE_INSENSITIVE}, `XY`, []Group{{1, 2}}}, + {`\by\B`, []ReFlag{RE_CASE_INSENSITIVE}, `YZ`, []Group{{0, 1}}}, + {`\By\B`, []ReFlag{RE_CASE_INSENSITIVE}, `XYZ`, []Group{{1, 2}}}, + {`ab|cd`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 2}}}, + {`ab|cd`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Group{{0, 2}, {2, 4}}}, + {`$b`, []ReFlag{RE_CASE_INSENSITIVE}, `B`, []Group{}}, + {`a\(b`, []ReFlag{RE_CASE_INSENSITIVE}, `A(B`, []Group{{0, 3}}}, + {`a\(*b`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Group{{0, 2}}}, + {`a\(*b`, []ReFlag{RE_CASE_INSENSITIVE}, `A((B`, []Group{{0, 4}}}, + {`a\\b`, []ReFlag{RE_CASE_INSENSITIVE}, `A\B`, []Group{{0, 3}}}, + {`a+b+c`, []ReFlag{RE_CASE_INSENSITIVE}, `AABBABC`, []Group{{4, 7}}}, + {`a{1,}b{1,}c`, []ReFlag{RE_CASE_INSENSITIVE}, `AABBABC`, []Group{{4, 7}}}, + {`)(`, []ReFlag{RE_CASE_INSENSITIVE}, `-`, nil}, + {`[^ab]*`, []ReFlag{RE_CASE_INSENSITIVE}, `CDE`, []Group{{0, 3}, {3, 3}}}, + {`abc`, []ReFlag{RE_CASE_INSENSITIVE}, ``, []Group{}}, + {`a*`, []ReFlag{RE_CASE_INSENSITIVE}, ``, []Group{{0, 0}}}, + {`a|b|c|d|e`, []ReFlag{RE_CASE_INSENSITIVE}, `E`, []Group{{0, 1}}}, + {`abcd*efg`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDEFG`, []Group{{0, 7}}}, + {`ab*`, []ReFlag{RE_CASE_INSENSITIVE}, `XABYABBBZ`, []Group{{1, 3}, {4, 8}}}, + {`ab*`, []ReFlag{RE_CASE_INSENSITIVE}, `XAYABBBZ`, []Group{{1, 2}, {3, 7}}}, + {`[abhgefdc]ij`, []ReFlag{RE_CASE_INSENSITIVE}, `HIJ`, []Group{{0, 3}}}, + {`a[bcd]*dcdcde`, []ReFlag{RE_CASE_INSENSITIVE}, `ADCDCDE`, []Group{{0, 7}}}, + {`a[bcd]+dcdcde`, []ReFlag{RE_CASE_INSENSITIVE}, `ADCDCDE`, []Group{}}, + {`[a-zA-Z_][a-zA-Z0-9_]*`, []ReFlag{RE_CASE_INSENSITIVE}, `ALPHA`, []Group{{0, 5}}}, + {`multiple words of text`, []ReFlag{RE_CASE_INSENSITIVE}, `UH-UH`, []Group{}}, + {`multiple words`, []ReFlag{RE_CASE_INSENSITIVE}, `MULTIPLE WORDS, YEAH`, []Group{{0, 14}}}, + {`[k]`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Group{}}, + {`a[-]?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AC`, []Group{{0, 2}}}, + {`^(.+)?B`, []ReFlag{RE_CASE_INSENSITIVE}, `ab`, []Group{{0, 2}}}, + {`\0009`, []ReFlag{RE_CASE_INSENSITIVE}, "\x009", []Group{{0, 2}}}, + {`\141`, []ReFlag{RE_CASE_INSENSITIVE}, "A", []Group{{0, 1}}}, + // Todo - add numeric range tests } @@ -405,6 +513,56 @@ var groupTests = []struct { {`([abc])*d`, nil, `abbbcd`, []Match{[]Group{{0, 6}, {4, 5}}}}, {`([abc])*bcd`, nil, `abcd`, []Match{[]Group{{0, 4}, {0, 1}}}}, {`^(ab|cd)e`, nil, `abcde`, []Match{}}, + + // Case-insensitive tests + {`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDEFGHIJKL9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}}, + {`()ef`, []ReFlag{RE_CASE_INSENSITIVE}, `DEF`, []Match{[]Group{{1, 3}, {1, 1}}}}, + {`(?:)ef`, []ReFlag{RE_CASE_INSENSITIVE}, `DEF`, []Match{[]Group{{1, 3}}}}, + {`(?:)`, []ReFlag{RE_CASE_INSENSITIVE}, `DEF`, []Match{[]Group{{0, 0}}, []Group{{1, 1}}, []Group{{2, 2}}, []Group{{3, 3}}}}, + {`((a))`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}}}}, + {`(a)b(c)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 3}, {0, 1}, {2, 3}}}}, + {`(a+|b)*`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}, {1, 2}}, []Group{{2, 2}}}}, + {`(a+|b){0,}`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}, {1, 2}}, []Group{{2, 2}}}}, + {`(a+|b)+`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}, {1, 2}}}}, + {`(a+|b){1,}`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}, {1, 2}}}}, + {`(a+|b)?`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}, []Group{{2, 2}}}}, + {`(a+|b){0,1}`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}, []Group{{2, 2}}}}, + {`(a|b|c|d|e)f`, []ReFlag{RE_CASE_INSENSITIVE}, `EF`, []Match{[]Group{{0, 2}, {0, 1}}}}, + {`(ab|cd)e`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{2, 5}, {2, 4}}}}, + {`^(ab|cd)e`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{}}, + {`(abc|)ef`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDEF`, []Match{[]Group{{4, 6}, {4, 4}}}}, + {`(a|b)c*d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{1, 4}, {1, 2}}}}, + {`(ab|ab*)bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 3}, {0, 1}}}}, + {`a([bc]*)c*`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 3}, {1, 3}}}}, + {`a([bc]*)(c*d)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {1, 3}, {3, 4}}}}, + {`a([bc]+)(c*d)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {1, 3}, {3, 4}}}}, + {`a([bc]*)(c+d)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {1, 2}, {2, 4}}}}, + {`(ab|a)b*c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{[]Group{{0, 3}, {0, 2}}}}, + {`((a)(b)c)(d)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {0, 3}, {0, 1}, {1, 2}, {3, 4}}}}, + {`^a(bc+|b[eh])g|.h$`, []ReFlag{RE_CASE_INSENSITIVE}, `ABH`, []Match{[]Group{{1, 3}}}}, + {`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `EFFGZ`, []Match{[]Group{{0, 5}, {0, 5}}}}, + {`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `IJ`, []Match{[]Group{{0, 2}, {0, 2}, {1, 2}}}}, + {`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `EFFG`, []Match{}}, + {`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `BCDD`, []Match{}}, + {`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}}, + {`(((((((((a)))))))))`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}}, + {`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}}, + {`(.*)c(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}}, + {`\((.*), (.*)\)`, []ReFlag{RE_CASE_INSENSITIVE}, `(A, B)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}}, + {`(a)(b)c|ab`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}}}}, + {`(a)+x`, []ReFlag{RE_CASE_INSENSITIVE}, `AAAX`, []Match{[]Group{{0, 4}, {2, 3}}}}, + {`([ac])+x`, []ReFlag{RE_CASE_INSENSITIVE}, `AACX`, []Match{[]Group{{0, 4}, {2, 3}}}}, + {`([^/]*/)*sub1/`, []ReFlag{RE_CASE_INSENSITIVE}, `D:MSGS/TDIR/SUB1/TRIAL/AWAY.CPP`, []Match{[]Group{{0, 17}, {7, 12}}}}, + {`([^.]*)\.([^:]*):[T ]+(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `TRACK1.TITLE:TBLAH BLAH BLAH`, []Match{[]Group{{0, 28}, {0, 6}, {7, 12}, {14, 28}}}}, + {`([^N]*N)+`, []ReFlag{RE_CASE_INSENSITIVE}, `ABNNXYZN`, []Match{[]Group{{0, 8}, {4, 8}}}}, + {`([^N]*N)+`, []ReFlag{RE_CASE_INSENSITIVE}, `ABNNXYZ`, []Match{[]Group{{0, 4}, {3, 4}}}}, + {`([abc]*)x`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCX`, []Match{[]Group{{0, 4}, {0, 3}}}}, + {`([abc]*)x`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Match{}}, + {`([xyz]*)x`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCX`, []Match{[]Group{{3, 4}, {3, 3}}}}, + {`(a)+b|aac`, []ReFlag{RE_CASE_INSENSITIVE}, `AAC`, []Match{[]Group{{0, 3}}}}, + {`([abc])*d`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBCD`, []Match{[]Group{{0, 6}, {4, 5}}}}, + {`([abc])*bcd`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCD`, []Match{[]Group{{0, 4}, {0, 1}}}}, + {`^(ab|cd)e`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{}}, } func TestFindAllMatches(t *testing.T) {