package main import ( "slices" "testing" ) var reTests = []struct { re string str string result []Group // Stores all zero-groups in the match }{ {"a", "abc", []Group{{0, 1}}}, {"a", "bca", []Group{{2, 3}}}, {"l", "ggllgg", []Group{{2, 3}, {3, 4}}}, {"(b|c)", "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}}, {"a+", "brerereraaaaabbbbb", []Group{{8, 13}}}, {"ab+", "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}}, {"(b|c|A)", "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}}, {"ab*", "a", []Group{{0, 1}}}, {"ab*", "abb", []Group{{0, 3}}}, {"a*b", "aaab", []Group{{0, 4}}}, {"a*b", "qwqw", []Group{}}, {"(abc)*", "abcabcabc", []Group{{0, 9}, {9, 9}}}, {"((abc)|(def))*", "abcdef", []Group{{0, 6}, {6, 6}}}, {"(abc)*|(def)*", "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}}, {"b*a*a", "bba", []Group{{0, 3}}}, {"(ab)+", "abcabddd", []Group{{0, 2}, {3, 5}}}, {"a(b(c|d)*)*", "abccbd", []Group{{0, 6}}}, {"a(b|c)*d+", "abccdd", []Group{{0, 6}}}, {"a*", "", []Group{{0, 0}}}, {"a|b", "c", []Group{}}, {"(a|b)*c", "aabbc", []Group{{0, 5}}}, {"a(b|b)", "ab", []Group{{0, 2}}}, {"a*", "aaaaaaaa", []Group{{0, 8}, {8, 8}}}, {"ab?", "ab", []Group{{0, 2}}}, {"a?b", "ab", []Group{{0, 2}}}, {"a?", "", []Group{{0, 0}}}, {"a?b?c", "a", []Group{}}, {"a?b?c?", "ab", []Group{{0, 2}, {2, 2}}}, {"a?b?c?", "ac", []Group{{0, 2}, {2, 2}}}, {"a?b?c", "abc", []Group{{0, 3}}}, {"a?b?c", "acb", []Group{{0, 2}}}, {"[abc]", "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}}, {"[ab]c", "ab", []Group{}}, {"g[ab]c", "gac", []Group{{0, 3}}}, {"g[ab]c", "gbc", []Group{{0, 3}}}, {"g[ab]c", "gc", []Group{}}, {"g[ab]c", "gfc", []Group{}}, {"[ab]*", "aabbbabaababab", []Group{{0, 14}, {14, 14}}}, {"[ab]+", "aabbbablaababab", []Group{{0, 7}, {8, 15}}}, {"[Ff]r[Uu]it", "fruit", []Group{{0, 5}}}, {"[Ff]r[Uu]it", "FrUit", []Group{{0, 5}}}, {"[Ff]r[Uu|]it", "Fr|it", []Group{{0, 5}}}, {"[Ff]r([Uu]|[pP])it", "Frpit", []Group{{0, 5}}}, {"[Ff]r[Uu]|[pP]it", "Frpit", []Group{{2, 5}}}, {"[a-zA-Z]+", "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}}, {".+", "Hello, how is it going?", []Group{{0, 23}}}, {"a.", "a ", []Group{{0, 2}}}, {"a.b", "a/b", []Group{{0, 3}}}, {".", "a ", []Group{{0, 1}, {1, 2}}}, {"a.", "a ", []Group{{0, 2}}}, {".+b", "abc", []Group{{0, 2}}}, {`\d`, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}}, {`\\`, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}}, {`\W`, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}}, {`\w`, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}}, {`\s`, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}}, {`\<`, "<HTML><body>", []Group{{0, 1}, {6, 7}}}, {`\(.+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}}, {"[^abc]+", "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}}, {"[^a]+", "qqqaq", []Group{{0, 3}, {4, 5}}}, {"[^0-9]+", "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}}, {"[^abc]+", "ababababbababaccacacacaca", []Group{}}, {`\[`, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}}, {`\([^)]+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}}, {"^ab", "ab bab", []Group{{0, 2}}}, {"^aaaa^", "aaaaaaaa", []Group{}}, {"^([bB][Gg])", "bG", []Group{{0, 2}}}, {"b$", "ba", []Group{}}, {"(boy|girl)$", "girlf", []Group{}}, {`\bint\b`, "print int integer", []Group{{6, 9}}}, {`int\b`, "ints", []Group{}}, {`int(\b|a)`, "inta", []Group{{0, 4}}}, {`\b\d+\b`, "511 a3 43", []Group{{0, 3}, {7, 9}}}, {`\Bint\B`, "prints int integer print", []Group{{2, 5}}}, {`^`, "5^3^2", []Group{{0, 0}}}, {`\^`, "5^3^2", []Group{{1, 2}, {3, 4}}}, {`pool$`, "pool carpool", []Group{{8, 12}}}, {`^int$`, "print int integer", []Group{}}, {`^int$`, "int", []Group{{0, 3}}}, {`b*`, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}}, {"a{4}", "aabaaa", []Group{}}, {"ab{5}", "abbbbbab", []Group{{0, 6}}}, {"(a|b){3,4}", "aba", []Group{{0, 3}}}, {"(a|b){3,4}", "ababaa", []Group{{0, 4}}}, {"(bc){5,}", "bcbcbcbcbcbcbcbc", []Group{{0, 16}}}, {`\d{3,4}`, "1209", []Group{{0, 4}}}, {`\d{3,4}`, "109", []Group{{0, 3}}}, {`\d{3,4}`, "5", []Group{}}, {`\d{3,4}`, "123135", []Group{{0, 4}}}, {`\d{3,4}`, "89a-0", []Group{}}, {`\d{3,4}`, "ababab555", []Group{{6, 9}}}, {`\bpaint\b`, "paints", []Group{}}, {`\b\w{5}\b`, "paint", []Group{{0, 5}}}, // Unicode tests {`.+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}}, {`a.b`, "a²b", []Group{{0, 3}}}, {`[^a]+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}}, // Fun experiment - AI-generated tests {"(abc|def|ghi)", "abcdefg", []Group{{0, 3}, {3, 6}}}, {"a(b|c)d", "abcd", []Group{}}, {"a(b|c)*d", "abcbcd", []Group{{0, 6}}}, {"a(b|c)+d", "abcbcd", []Group{{0, 6}}}, {"a(b|c)?d", "abd", []Group{{0, 3}}}, {".+", "hello world", []Group{{0, 11}}}, {"a.b", "aXb", []Group{{0, 3}}}, {"a.*b", "aXb", []Group{{0, 3}}}, {"a.{2,3}b", "aXXb", []Group{{0, 4}}}, {"a.{2,}b", "aXXXb", []Group{{0, 5}}}, {"a.{0,3}b", "ab", []Group{{0, 2}}}, {"[abc]+", "abcabc", []Group{{0, 6}}}, {"[a-zA-Z]+", "HelloWorld", []Group{{0, 10}}}, {"[^abc]+", "defghi", []Group{{0, 6}}}, {"^hello", "hello world", []Group{{0, 5}}}, {"world$", "hello world", []Group{{6, 11}}}, {`\bhello\b`, "hello world", []Group{{0, 5}}}, {`\Bhello\B`, "hello world", []Group{}}, {"(hello|world)", "hello world", []Group{{0, 5}, {6, 11}}}, {"(hello|world)+", "hello world", []Group{{0, 5}, {6, 11}}}, {"(hello|world)*", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}}, {"(hello|world)?", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}}, {"ú.+ï", "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}}, {"(?=hello)", "hello world", []Group{{0, 0}}}, {"(?!hello)", "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}}, {"(?<=hello)", "hello world", []Group{{5, 5}}}, {"(?<!hello)", "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}}, {"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "40", []Group{{0, 2}}}, {"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "040", []Group{}}, {"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "400", []Group{{0, 3}}}, {"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "4000", []Group{}}, // Lookaround tests {"(?<=bo)y", "boy", []Group{{2, 3}}}, {"bo(?=y)", "boy", []Group{{0, 2}}}, {"(?<=f)f+(?=f)", "fffff", []Group{{1, 4}}}, {"(?<=f)f+(?=f)", "fffffa", []Group{{1, 4}}}, // Todo - add numeric range tests } var groupTests = []struct { re string str string result []Match }{ {"(a)(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}}, {"((a))(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}}, {"(0)", "ab", []Match{[]Group{}}}, {"(a)b", "ab", []Match{[]Group{{0, 2}, {0, 1}}}}, {"a(b)", "ab", []Match{[]Group{{0, 2}, {1, 2}}}}, {"(a|b)", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}}, {"(a)|(b)", "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}}, {"(a+)(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}}, {"(a+)|(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}}, {"(a+)(aa)", "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}}, {"(aaaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}}, {"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}}, {"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}}, {"(aaaa)|(aaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}}, {"(a)|(aa)", "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}}, {"(a?)a?", "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}}, {"(a?)a?", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}}, {"(a?)a?", "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}}, {"a((b.d){3})", "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}}, } func TestFindAllMatches(t *testing.T) { for _, test := range reTests { t.Run(test.re+" "+test.str, func(t *testing.T) { re_postfix := shuntingYard(test.re) startState, numGroups := thompson(re_postfix) matchIndices := findAllMatches(startState, []rune(test.str), numGroups) zeroGroups := make([]Group, len(matchIndices)) for i, m := range matchIndices { zeroGroups[i] = m[0] } if !slices.Equal(test.result, zeroGroups) { t.Errorf("Wanted %v Got %v\n", test.result, zeroGroups) } }) } } func TestFindAllGroups(t *testing.T) { for _, test := range groupTests { t.Run(test.re+" "+test.str, func(t *testing.T) { re_postfix := shuntingYard(test.re) startState, numGroups := thompson(re_postfix) matchIndices := findAllMatches(startState, []rune(test.str), numGroups) for i := range matchIndices { for j := range matchIndices[i] { if matchIndices[i][j].isValid() { if test.result[i][j] != matchIndices[i][j] { t.Errorf("Wanted %v Got %v\n", test.result, matchIndices) } } } } }) } }