4 Commits

3 changed files with 116 additions and 87 deletions

View File

@@ -18,7 +18,7 @@ Single characters:
[^abc] Negated character class - match any character except a, b and c
[^a-z] Negated character range - do not match any character from a to z
\[ Match a literal '['. Backslashes can escape any character with special meaning, including another backslash.
\452 Match the character with the octal value 452 (up to 3 digits)
\0452 Match the character with the octal value 452 (up to 4 digits, first digit must be 0)
\xFF Match the character with the hex value FF (exactly 2 characters)
\x{0000FF} Match the character with the hex value 0000FF (exactly 6 characters)
\n Newline
@@ -93,6 +93,10 @@ Lookarounds:
(?<=x)y Positive lookbehind - Match y if preceded by x
(?<!x)y Negative lookbehind - Match y if NOT preceded by x
Backreferences:
(xy)\1 Match 'xy' followed by the text most recently captured by group 1 (in this case, 'xy')
Numeric ranges:
<x-y> Match any number from x to y (inclusive) (x and y must be positive numbers)
@@ -156,6 +160,7 @@ The following features from [regexp] are (currently) NOT supported:
The following features are not available in [regexp], but are supported in my engine:
1. Lookarounds
2. Numeric ranges
3. Backreferences
I hope to shorten the first list, and expand the second.
*/

View File

@@ -319,7 +319,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
if currentState.contentContains(str, idx, preferLongest) {
nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
}
} else if currentState.isBackreference {
} else if currentState.isBackreference && currentState.threadGroups[currentState.referredGroup].IsValid() {
groupLength := currentState.threadGroups[currentState.referredGroup].EndIdx - currentState.threadGroups[currentState.referredGroup].StartIdx
if currentState.threadBackref == groupLength {
currentState.threadBackref = 0

View File

@@ -314,10 +314,6 @@ var reTests = []struct {
{`\0009`, nil, "\x009", []Group{{0, 2}}},
{`\0141`, nil, "a", []Group{{0, 1}}},
// At this point, the python test suite has a bunch
// of backreference tests. Since my engine doesn't
// implement backreferences, I've skipped those tests.
{`*a`, nil, ``, nil},
{`(*)b`, nil, ``, nil},
{`a**`, nil, ``, nil},
@@ -585,9 +581,33 @@ var groupTests = []struct {
{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
// At this point, the python test suite has a bunch
// of backreference tests. Since my engine doesn't
// implement backreferences, I've skipped those tests.
// Backreference tests
{`(abc)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
{`([a-c]+)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
{`([a-c]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
{`^(.+)?B`, nil, `AB`, []Match{[]Group{{0, 2}, {0, 1}}}},
{`(a+).\1$`, nil, `aaaaa`, []Match{[]Group{{0, 5}, {0, 2}}}},
{`^(a+).\1$`, nil, `aaaa`, []Match{}},
{`(a)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
{`(a+)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
{`(a+)+\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
{`(a).+\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
{`(a)ba*\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
{`(aa|a)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
{`(a|aa)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
{`(a+)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
{`([abc]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
{`(a)(?:b)\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
{`(a)(?:b)\1`, nil, `abb`, []Match{}},
{`(?:a)(b)\1`, nil, `aba`, []Match{}},
{`(?:a)(b)\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
{`(?:(cat)|(dog))\2`, nil, `catdog`, []Match{}},
{`(?:a)\1`, nil, `aa`, nil},
{`((cat)|(dog)|(cow)|(bat))\4`, nil, `cowcow`, []Match{[]Group{{0, 6}, {0, 3}, {-1, -1}, {-1, -1}, {0, 3}, {-1, -1}}}},
{`(a|b)*\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
{`(a|b)*\1`, nil, `aba`, []Match{}},
{`(a|b)*\1`, nil, `bab`, []Match{}},
{`(a|b)*\1`, nil, `baa`, []Match{[]Group{{0, 3}, {1, 2}}}},
{`(a)(b)c|ab`, nil, `ab`, []Match{[]Group{{0, 2}}}},
{`(a)+x`, nil, `aaax`, []Match{[]Group{{0, 4}, {2, 3}}}},
@@ -792,7 +812,7 @@ func TestFindSubmatch(t *testing.T) {
if test.result != nil {
panic(err)
}
}
} else {
match, err := regComp.FindSubmatch(test.str)
if err != nil {
if len(test.result) != 0 {
@@ -812,6 +832,7 @@ func TestFindSubmatch(t *testing.T) {
}
}
}
}
})
}
}
@@ -823,7 +844,7 @@ func TestFindStringSubmatch(t *testing.T) {
if test.result != nil {
panic(err)
}
}
} else {
matchStr := regComp.FindStringSubmatch(test.str)
if matchStr == nil {
if len(test.result) != 0 {
@@ -858,6 +879,7 @@ func TestFindStringSubmatch(t *testing.T) {
}
}
}
}
})
}
}
@@ -870,7 +892,7 @@ func TestFindAllStringSubmatch(t *testing.T) {
if test.result != nil {
panic(err)
}
}
} else {
matchStrs := regComp.FindAllStringSubmatch(test.str)
if matchStrs == nil {
if len(test.result) != 0 {
@@ -911,6 +933,7 @@ func TestFindAllStringSubmatch(t *testing.T) {
}
}
}
}
})
}
}
@@ -923,7 +946,7 @@ func TestFindAllSubmatch(t *testing.T) {
if test.result != nil {
panic(err)
}
}
} else {
matchIndices := regComp.FindAllSubmatch(test.str)
for i := range matchIndices {
for j := range matchIndices[i] {
@@ -938,6 +961,7 @@ func TestFindAllSubmatch(t *testing.T) {
}
}
}
}
})
}
}