|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"slices"
|
|
|
|
"testing"
|
|
|
|
)
|
|
|
|
|
|
|
|
var reTests = []struct {
|
|
|
|
re string
|
|
|
|
str string
|
|
|
|
result []MatchIndex
|
|
|
|
}{
|
|
|
|
{"a", "abc", []MatchIndex{{0, 1}}},
|
|
|
|
{"a", "bca", []MatchIndex{{2, 3}}},
|
|
|
|
{"l", "ggllgg", []MatchIndex{{2, 3}, {3, 4}}},
|
|
|
|
{"(b|c)", "abdceb", []MatchIndex{{1, 2}, {3, 4}, {5, 6}}},
|
|
|
|
{"a+", "brerereraaaaabbbbb", []MatchIndex{{8, 13}}},
|
|
|
|
{"ab+", "qweqweqweaqweqweabbbbbr", []MatchIndex{{16, 22}}},
|
|
|
|
{"(b|c|A)", "ooaoobocA", []MatchIndex{{5, 6}, {7, 8}, {8, 9}}},
|
|
|
|
{"ab*", "a", []MatchIndex{{0, 1}}},
|
|
|
|
{"ab*", "abb", []MatchIndex{{0, 3}}},
|
|
|
|
{"a*b", "aaab", []MatchIndex{{0, 4}}},
|
|
|
|
{"a*b", "qwqw", []MatchIndex{}},
|
|
|
|
{"(abc)*", "abcabcabc", []MatchIndex{{0, 9}, {9, 9}}},
|
|
|
|
{"((abc)|(def))*", "abcdef", []MatchIndex{{0, 6}, {6, 6}}},
|
|
|
|
{"(abc)*|(def)*", "abcdef", []MatchIndex{{0, 3}, {3, 6}, {6, 6}}},
|
|
|
|
{"b*a*a", "bba", []MatchIndex{{0, 3}}},
|
|
|
|
{"(ab)+", "abcabddd", []MatchIndex{{0, 2}, {3, 5}}},
|
|
|
|
{"a(b(c|d)*)*", "abccbd", []MatchIndex{{0, 6}}},
|
|
|
|
{"a(b|c)*d+", "abccdd", []MatchIndex{{0, 6}}},
|
|
|
|
{"a*", "", []MatchIndex{{0, 0}}},
|
|
|
|
{"a|b", "c", []MatchIndex{}},
|
|
|
|
{"(a|b)*c", "aabbc", []MatchIndex{{0, 5}}},
|
|
|
|
{"a(b|b)", "ab", []MatchIndex{{0, 2}}},
|
|
|
|
{"a*", "aaaaaaaa", []MatchIndex{{0, 8}, {8, 8}}},
|
|
|
|
|
|
|
|
{"ab?", "ab", []MatchIndex{{0, 2}}},
|
|
|
|
{"a?b", "ab", []MatchIndex{{0, 2}}},
|
|
|
|
{"a?", "", []MatchIndex{{0, 0}}},
|
|
|
|
{"a?b?c", "a", []MatchIndex{}},
|
|
|
|
{"a?b?c?", "ab", []MatchIndex{{0, 2}, {2, 2}}},
|
|
|
|
{"a?b?c?", "ac", []MatchIndex{{0, 2}, {2, 2}}},
|
|
|
|
{"a?b?c", "abc", []MatchIndex{{0, 3}}},
|
|
|
|
{"a?b?c", "acb", []MatchIndex{{0, 2}}},
|
|
|
|
|
|
|
|
{"[abc]", "defadefbdefce", []MatchIndex{{3, 4}, {7, 8}, {11, 12}}},
|
|
|
|
{"[ab]c", "ab", []MatchIndex{}},
|
|
|
|
{"g[ab]c", "gac", []MatchIndex{{0, 3}}},
|
|
|
|
{"g[ab]c", "gbc", []MatchIndex{{0, 3}}},
|
|
|
|
{"g[ab]c", "gc", []MatchIndex{}},
|
|
|
|
{"g[ab]c", "gfc", []MatchIndex{}},
|
|
|
|
{"[ab]*", "aabbbabaababab", []MatchIndex{{0, 14}, {14, 14}}},
|
|
|
|
{"[ab]+", "aabbbablaababab", []MatchIndex{{0, 7}, {8, 15}}},
|
|
|
|
{"[Ff]r[Uu]it", "fruit", []MatchIndex{{0, 5}}},
|
|
|
|
{"[Ff]r[Uu]it", "FrUit", []MatchIndex{{0, 5}}},
|
|
|
|
{"[Ff]r[Uu|]it", "Fr|it", []MatchIndex{{0, 5}}},
|
|
|
|
{"[Ff]r([Uu]|[pP])it", "Frpit", []MatchIndex{{0, 5}}},
|
|
|
|
{"[Ff]r[Uu]|[pP]it", "Frpit", []MatchIndex{{2, 5}}},
|
|
|
|
{"[a-zA-Z]+", "Hello, how is it going?", []MatchIndex{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}},
|
|
|
|
|
|
|
|
{".+", "Hello, how is it going?", []MatchIndex{{0, 23}}},
|
|
|
|
{"a.", "a ", []MatchIndex{{0, 2}}},
|
|
|
|
{"a.b", "a/b", []MatchIndex{{0, 3}}},
|
|
|
|
{".", "a ", []MatchIndex{{0, 1}, {1, 2}}},
|
|
|
|
{"a.", "a ", []MatchIndex{{0, 2}}},
|
|
|
|
{".+b", "abc", []MatchIndex{{0, 2}}},
|
|
|
|
|
|
|
|
{`\d`, "1a0a3s'''34343s", []MatchIndex{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}},
|
|
|
|
{`\\`, `a\b\c\qwe\`, []MatchIndex{{1, 2}, {3, 4}, {5, 6}, {9, 10}}},
|
|
|
|
{`\W`, `"Hello", he said. How are you doing?`, []MatchIndex{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}},
|
|
|
|
{`\w`, ";';';';';'qwe12", []MatchIndex{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}},
|
|
|
|
{`\s`, "a b c d", []MatchIndex{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
|
|
|
{`\<`, "<HTML><body>", []MatchIndex{{0, 1}, {6, 7}}},
|
|
|
|
{`\(.+\)`, "Not (paranthesized), (so) is (this) not", []MatchIndex{{4, 35}}},
|
|
|
|
|
|
|
|
{"[^abc]+", "qarbtopsaplpclkpasdmb prejip0r,p", []MatchIndex{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}},
|
|
|
|
{"[^a]+", "qqqaq", []MatchIndex{{0, 3}, {4, 5}}},
|
|
|
|
{"[^0-9]+", "a1b2c3dd", []MatchIndex{{0, 1}, {2, 3}, {4, 5}, {6, 8}}},
|
|
|
|
{"[^abc]+", "ababababbababaccacacacaca", []MatchIndex{}},
|
|
|
|
{`\[`, "a[b[c[]]]", []MatchIndex{{1, 2}, {3, 4}, {5, 6}}},
|
|
|
|
{`\([^)]+\)`, "Not (paranthesized), (so) is (this) not", []MatchIndex{{4, 19}, {21, 25}, {29, 35}}},
|
|
|
|
|
|
|
|
{"^ab", "ab bab", []MatchIndex{{0, 2}}},
|
|
|
|
{"^aaaa^", "aaaaaaaa", []MatchIndex{}},
|
|
|
|
{"^([bB][Gg])", "bG", []MatchIndex{{0, 2}}},
|
|
|
|
{"b$", "ba", []MatchIndex{}},
|
|
|
|
{"(boy|girl)$", "girlf", []MatchIndex{}},
|
|
|
|
{`\bint\b`, "print int integer", []MatchIndex{{6, 9}}},
|
|
|
|
{`int\b`, "ints", []MatchIndex{}},
|
|
|
|
{`int(\b|a)`, "inta", []MatchIndex{{0, 4}}},
|
|
|
|
{`\b\d+\b`, "511 a3 43", []MatchIndex{{0, 3}, {7, 9}}},
|
|
|
|
{`\Bint\B`, "prints int integer print", []MatchIndex{{2, 5}}},
|
|
|
|
{`^`, "5^3^2", []MatchIndex{{0, 0}}},
|
|
|
|
{`\^`, "5^3^2", []MatchIndex{{1, 2}, {3, 4}}},
|
|
|
|
{`pool$`, "pool carpool", []MatchIndex{{8, 12}}},
|
|
|
|
{`^int$`, "print int integer", []MatchIndex{}},
|
|
|
|
{`^int$`, "int", []MatchIndex{{0, 3}}},
|
|
|
|
{`b*`, "aaaaaaaaaaqweqwe", []MatchIndex{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}},
|
|
|
|
|
|
|
|
{"a{4}", "aabaaa", []MatchIndex{}},
|
|
|
|
{"ab{5}", "abbbbbab", []MatchIndex{{0, 6}}},
|
|
|
|
{"(a|b){3,4}", "aba", []MatchIndex{{0, 3}}},
|
|
|
|
{"(a|b){3,4}", "ababaa", []MatchIndex{{0, 4}}},
|
|
|
|
{"(bc){5,}", "bcbcbcbcbcbcbcbc", []MatchIndex{{0, 16}}},
|
|
|
|
{`\d{3,4}`, "1209", []MatchIndex{{0, 4}}},
|
|
|
|
{`\d{3,4}`, "109", []MatchIndex{{0, 3}}},
|
|
|
|
{`\d{3,4}`, "5", []MatchIndex{}},
|
|
|
|
{`\d{3,4}`, "123135", []MatchIndex{{0, 4}}},
|
|
|
|
{`\d{3,4}`, "89a-0", []MatchIndex{}},
|
|
|
|
{`\d{3,4}`, "ababab555", []MatchIndex{{6, 9}}},
|
|
|
|
{`\bpaint\b`, "paints", []MatchIndex{}},
|
|
|
|
{`\b\w{5}\b`, "paint", []MatchIndex{{0, 5}}},
|
|
|
|
|
|
|
|
// Unicode tests
|
|
|
|
{`.+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []MatchIndex{{0, 25}}},
|
|
|
|
{`a.b`, "a²b", []MatchIndex{{0, 3}}},
|
|
|
|
{`[^a]+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []MatchIndex{{0, 25}}},
|
|
|
|
|
|
|
|
// Fun experiment - AI-generated tests
|
|
|
|
{"(abc|def|ghi)", "abcdefg", []MatchIndex{{0, 3}, {3, 6}}},
|
|
|
|
{"a(b|c)d", "abcd", []MatchIndex{}},
|
|
|
|
{"a(b|c)*d", "abcbcd", []MatchIndex{{0, 6}}},
|
|
|
|
{"a(b|c)+d", "abcbcd", []MatchIndex{{0, 6}}},
|
|
|
|
{"a(b|c)?d", "abd", []MatchIndex{{0, 3}}},
|
|
|
|
{".+", "hello world", []MatchIndex{{0, 11}}},
|
|
|
|
{"a.b", "aXb", []MatchIndex{{0, 3}}},
|
|
|
|
{"a.*b", "aXb", []MatchIndex{{0, 3}}},
|
|
|
|
{"a.{2,3}b", "aXXb", []MatchIndex{{0, 4}}},
|
|
|
|
{"a.{2,}b", "aXXXb", []MatchIndex{{0, 5}}},
|
|
|
|
{"a.{0,3}b", "ab", []MatchIndex{{0, 2}}},
|
|
|
|
{"[abc]+", "abcabc", []MatchIndex{{0, 6}}},
|
|
|
|
{"[a-zA-Z]+", "HelloWorld", []MatchIndex{{0, 10}}},
|
|
|
|
{"[^abc]+", "defghi", []MatchIndex{{0, 6}}},
|
|
|
|
{"^hello", "hello world", []MatchIndex{{0, 5}}},
|
|
|
|
{"world$", "hello world", []MatchIndex{{6, 11}}},
|
|
|
|
{`\bhello\b`, "hello world", []MatchIndex{{0, 5}}},
|
|
|
|
{`\Bhello\B`, "hello world", []MatchIndex{}},
|
|
|
|
{"(hello|world)", "hello world", []MatchIndex{{0, 5}, {6, 11}}},
|
|
|
|
{"(hello|world)+", "hello world", []MatchIndex{{0, 5}, {6, 11}}},
|
|
|
|
{"(hello|world)*", "hello world", []MatchIndex{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
|
|
|
{"(hello|world)?", "hello world", []MatchIndex{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
|
|
|
{"ú.+ï", "úïäö´«åæïëòöê»éãçâï«úïòíñ", []MatchIndex{{0, 22}}},
|
|
|
|
{"(?=hello)", "hello world", []MatchIndex{{0, 0}}},
|
|
|
|
{"(?!hello)", "hello world", []MatchIndex{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
|
|
|
{"(?<=hello)", "hello world", []MatchIndex{{5, 5}}},
|
|
|
|
{"(?<!hello)", "hello world", []MatchIndex{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
|
|
|
|
|
|
|
// Todo - add lookaround tests
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestFindAllMatches(t *testing.T) {
|
|
|
|
for _, test := range reTests {
|
|
|
|
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
|
|
|
re_postfix := shuntingYard(test.re)
|
|
|
|
startState := thompson(re_postfix)
|
|
|
|
matchIndices := findAllMatches(startState, []rune(test.str))
|
|
|
|
if !slices.Equal(test.result, matchIndices) {
|
|
|
|
t.Errorf("Wanted %v Got %v\n", test.result, matchIndices)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|