Compare commits
4 Commits
f466d4a8d5
...
implementB
| Author | SHA1 | Date | |
|---|---|---|---|
| 375baa1722 | |||
| 2e47c631bb | |||
| 81b8b1b11c | |||
| 2934e7a20f |
@@ -18,7 +18,7 @@ Single characters:
|
|||||||
[^abc] Negated character class - match any character except a, b and c
|
[^abc] Negated character class - match any character except a, b and c
|
||||||
[^a-z] Negated character range - do not match any character from a to z
|
[^a-z] Negated character range - do not match any character from a to z
|
||||||
\[ Match a literal '['. Backslashes can escape any character with special meaning, including another backslash.
|
\[ Match a literal '['. Backslashes can escape any character with special meaning, including another backslash.
|
||||||
\452 Match the character with the octal value 452 (up to 3 digits)
|
\0452 Match the character with the octal value 452 (up to 4 digits, first digit must be 0)
|
||||||
\xFF Match the character with the hex value FF (exactly 2 characters)
|
\xFF Match the character with the hex value FF (exactly 2 characters)
|
||||||
\x{0000FF} Match the character with the hex value 0000FF (exactly 6 characters)
|
\x{0000FF} Match the character with the hex value 0000FF (exactly 6 characters)
|
||||||
\n Newline
|
\n Newline
|
||||||
@@ -93,6 +93,10 @@ Lookarounds:
|
|||||||
(?<=x)y Positive lookbehind - Match y if preceded by x
|
(?<=x)y Positive lookbehind - Match y if preceded by x
|
||||||
(?<!x)y Negative lookbehind - Match y if NOT preceded by x
|
(?<!x)y Negative lookbehind - Match y if NOT preceded by x
|
||||||
|
|
||||||
|
Backreferences:
|
||||||
|
|
||||||
|
(xy)\1 Match 'xy' followed by the text most recently captured by group 1 (in this case, 'xy')
|
||||||
|
|
||||||
Numeric ranges:
|
Numeric ranges:
|
||||||
|
|
||||||
<x-y> Match any number from x to y (inclusive) (x and y must be positive numbers)
|
<x-y> Match any number from x to y (inclusive) (x and y must be positive numbers)
|
||||||
@@ -156,6 +160,7 @@ The following features from [regexp] are (currently) NOT supported:
|
|||||||
The following features are not available in [regexp], but are supported in my engine:
|
The following features are not available in [regexp], but are supported in my engine:
|
||||||
1. Lookarounds
|
1. Lookarounds
|
||||||
2. Numeric ranges
|
2. Numeric ranges
|
||||||
|
3. Backreferences
|
||||||
|
|
||||||
I hope to shorten the first list, and expand the second.
|
I hope to shorten the first list, and expand the second.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -319,7 +319,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
if currentState.contentContains(str, idx, preferLongest) {
|
if currentState.contentContains(str, idx, preferLongest) {
|
||||||
nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
|
nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
|
||||||
}
|
}
|
||||||
} else if currentState.isBackreference {
|
} else if currentState.isBackreference && currentState.threadGroups[currentState.referredGroup].IsValid() {
|
||||||
groupLength := currentState.threadGroups[currentState.referredGroup].EndIdx - currentState.threadGroups[currentState.referredGroup].StartIdx
|
groupLength := currentState.threadGroups[currentState.referredGroup].EndIdx - currentState.threadGroups[currentState.referredGroup].StartIdx
|
||||||
if currentState.threadBackref == groupLength {
|
if currentState.threadBackref == groupLength {
|
||||||
currentState.threadBackref = 0
|
currentState.threadBackref = 0
|
||||||
|
|||||||
194
regex/re_test.go
194
regex/re_test.go
@@ -314,10 +314,6 @@ var reTests = []struct {
|
|||||||
{`\0009`, nil, "\x009", []Group{{0, 2}}},
|
{`\0009`, nil, "\x009", []Group{{0, 2}}},
|
||||||
{`\0141`, nil, "a", []Group{{0, 1}}},
|
{`\0141`, nil, "a", []Group{{0, 1}}},
|
||||||
|
|
||||||
// At this point, the python test suite has a bunch
|
|
||||||
// of backreference tests. Since my engine doesn't
|
|
||||||
// implement backreferences, I've skipped those tests.
|
|
||||||
|
|
||||||
{`*a`, nil, ``, nil},
|
{`*a`, nil, ``, nil},
|
||||||
{`(*)b`, nil, ``, nil},
|
{`(*)b`, nil, ``, nil},
|
||||||
{`a**`, nil, ``, nil},
|
{`a**`, nil, ``, nil},
|
||||||
@@ -585,9 +581,33 @@ var groupTests = []struct {
|
|||||||
{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
|
{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
|
||||||
{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
|
{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
|
||||||
|
|
||||||
// At this point, the python test suite has a bunch
|
// Backreference tests
|
||||||
// of backreference tests. Since my engine doesn't
|
{`(abc)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
|
||||||
// implement backreferences, I've skipped those tests.
|
{`([a-c]+)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
|
||||||
|
{`([a-c]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
|
||||||
|
{`^(.+)?B`, nil, `AB`, []Match{[]Group{{0, 2}, {0, 1}}}},
|
||||||
|
{`(a+).\1$`, nil, `aaaaa`, []Match{[]Group{{0, 5}, {0, 2}}}},
|
||||||
|
{`^(a+).\1$`, nil, `aaaa`, []Match{}},
|
||||||
|
{`(a)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
|
||||||
|
{`(a+)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
|
||||||
|
{`(a+)+\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
|
||||||
|
{`(a).+\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
|
||||||
|
{`(a)ba*\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
|
||||||
|
{`(aa|a)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
|
||||||
|
{`(a|aa)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
|
||||||
|
{`(a+)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
|
||||||
|
{`([abc]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
|
||||||
|
{`(a)(?:b)\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
|
||||||
|
{`(a)(?:b)\1`, nil, `abb`, []Match{}},
|
||||||
|
{`(?:a)(b)\1`, nil, `aba`, []Match{}},
|
||||||
|
{`(?:a)(b)\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
|
||||||
|
{`(?:(cat)|(dog))\2`, nil, `catdog`, []Match{}},
|
||||||
|
{`(?:a)\1`, nil, `aa`, nil},
|
||||||
|
{`((cat)|(dog)|(cow)|(bat))\4`, nil, `cowcow`, []Match{[]Group{{0, 6}, {0, 3}, {-1, -1}, {-1, -1}, {0, 3}, {-1, -1}}}},
|
||||||
|
{`(a|b)*\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
|
||||||
|
{`(a|b)*\1`, nil, `aba`, []Match{}},
|
||||||
|
{`(a|b)*\1`, nil, `bab`, []Match{}},
|
||||||
|
{`(a|b)*\1`, nil, `baa`, []Match{[]Group{{0, 3}, {1, 2}}}},
|
||||||
|
|
||||||
{`(a)(b)c|ab`, nil, `ab`, []Match{[]Group{{0, 2}}}},
|
{`(a)(b)c|ab`, nil, `ab`, []Match{[]Group{{0, 2}}}},
|
||||||
{`(a)+x`, nil, `aaax`, []Match{[]Group{{0, 4}, {2, 3}}}},
|
{`(a)+x`, nil, `aaax`, []Match{[]Group{{0, 4}, {2, 3}}}},
|
||||||
@@ -792,23 +812,24 @@ func TestFindSubmatch(t *testing.T) {
|
|||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
match, err := regComp.FindSubmatch(test.str)
|
match, err := regComp.FindSubmatch(test.str)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if len(test.result) != 0 {
|
if len(test.result) != 0 {
|
||||||
t.Errorf("Wanted %v got no match\n", test.result[0])
|
t.Errorf("Wanted %v got no match\n", test.result[0])
|
||||||
}
|
|
||||||
} else if len(test.result) == 0 {
|
|
||||||
t.Errorf("Wanted no match got %v\n", match)
|
|
||||||
}
|
|
||||||
for i := range match {
|
|
||||||
if match[i].IsValid() {
|
|
||||||
if test.result[0][i] != match[i] {
|
|
||||||
t.Errorf("Wanted %v Got %v\n", test.result[0], match)
|
|
||||||
}
|
}
|
||||||
} else {
|
} else if len(test.result) == 0 {
|
||||||
if i < len(test.result) && test.result[0][i].IsValid() {
|
t.Errorf("Wanted no match got %v\n", match)
|
||||||
t.Errorf("Wanted %v Got %v\n", test.result[0], match)
|
}
|
||||||
|
for i := range match {
|
||||||
|
if match[i].IsValid() {
|
||||||
|
if test.result[0][i] != match[i] {
|
||||||
|
t.Errorf("Wanted %v Got %v\n", test.result[0], match)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if i < len(test.result) && test.result[0][i].IsValid() {
|
||||||
|
t.Errorf("Wanted %v Got %v\n", test.result[0], match)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -823,10 +844,22 @@ func TestFindStringSubmatch(t *testing.T) {
|
|||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
matchStr := regComp.FindStringSubmatch(test.str)
|
matchStr := regComp.FindStringSubmatch(test.str)
|
||||||
if matchStr == nil {
|
if matchStr == nil {
|
||||||
if len(test.result) != 0 {
|
if len(test.result) != 0 {
|
||||||
|
expectedStr := funcMap(test.result[0], func(g Group) string {
|
||||||
|
if g.IsValid() {
|
||||||
|
return test.str[g.StartIdx:g.EndIdx]
|
||||||
|
} else {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
})
|
||||||
|
t.Errorf("Wanted %v got no match\n", expectedStr)
|
||||||
|
}
|
||||||
|
} else if len(test.result) == 0 {
|
||||||
|
t.Errorf("Wanted no match got %v\n", matchStr)
|
||||||
|
} else {
|
||||||
expectedStr := funcMap(test.result[0], func(g Group) string {
|
expectedStr := funcMap(test.result[0], func(g Group) string {
|
||||||
if g.IsValid() {
|
if g.IsValid() {
|
||||||
return test.str[g.StartIdx:g.EndIdx]
|
return test.str[g.StartIdx:g.EndIdx]
|
||||||
@@ -834,26 +867,15 @@ func TestFindStringSubmatch(t *testing.T) {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Errorf("Wanted %v got no match\n", expectedStr)
|
for i, groupStr := range matchStr {
|
||||||
}
|
if groupStr == "" {
|
||||||
} else if len(test.result) == 0 {
|
if i < len(expectedStr) && expectedStr[i] != "" {
|
||||||
t.Errorf("Wanted no match got %v\n", matchStr)
|
t.Errorf("Wanted %v Got %v\n", expectedStr, matchStr)
|
||||||
} else {
|
}
|
||||||
expectedStr := funcMap(test.result[0], func(g Group) string {
|
} else {
|
||||||
if g.IsValid() {
|
if expectedStr[i] != groupStr {
|
||||||
return test.str[g.StartIdx:g.EndIdx]
|
t.Errorf("Wanted %v Got %v\n", expectedStr, matchStr)
|
||||||
} else {
|
}
|
||||||
return ""
|
|
||||||
}
|
|
||||||
})
|
|
||||||
for i, groupStr := range matchStr {
|
|
||||||
if groupStr == "" {
|
|
||||||
if i < len(expectedStr) && expectedStr[i] != "" {
|
|
||||||
t.Errorf("Wanted %v Got %v\n", expectedStr, matchStr)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if expectedStr[i] != groupStr {
|
|
||||||
t.Errorf("Wanted %v Got %v\n", expectedStr, matchStr)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -870,10 +892,24 @@ func TestFindAllStringSubmatch(t *testing.T) {
|
|||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
matchStrs := regComp.FindAllStringSubmatch(test.str)
|
matchStrs := regComp.FindAllStringSubmatch(test.str)
|
||||||
if matchStrs == nil {
|
if matchStrs == nil {
|
||||||
if len(test.result) != 0 {
|
if len(test.result) != 0 {
|
||||||
|
expectedStrs := funcMap(test.result, func(m Match) []string {
|
||||||
|
return funcMap(m, func(g Group) string {
|
||||||
|
if g.IsValid() {
|
||||||
|
return test.str[g.StartIdx:g.EndIdx]
|
||||||
|
} else {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
t.Errorf("Wanted %v got no match\n", expectedStrs)
|
||||||
|
}
|
||||||
|
} else if len(test.result) == 0 {
|
||||||
|
t.Errorf("Wanted no match got %v\n", matchStrs)
|
||||||
|
} else {
|
||||||
expectedStrs := funcMap(test.result, func(m Match) []string {
|
expectedStrs := funcMap(test.result, func(m Match) []string {
|
||||||
return funcMap(m, func(g Group) string {
|
return funcMap(m, func(g Group) string {
|
||||||
if g.IsValid() {
|
if g.IsValid() {
|
||||||
@@ -883,29 +919,16 @@ func TestFindAllStringSubmatch(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
t.Errorf("Wanted %v got no match\n", expectedStrs)
|
for i, matchStr := range matchStrs {
|
||||||
}
|
for j, groupStr := range matchStr {
|
||||||
} else if len(test.result) == 0 {
|
if groupStr == "" {
|
||||||
t.Errorf("Wanted no match got %v\n", matchStrs)
|
if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
|
||||||
} else {
|
t.Errorf("Wanted %v Got %v\n", expectedStrs, matchStrs)
|
||||||
expectedStrs := funcMap(test.result, func(m Match) []string {
|
}
|
||||||
return funcMap(m, func(g Group) string {
|
} else {
|
||||||
if g.IsValid() {
|
if expectedStrs[i][j] != groupStr {
|
||||||
return test.str[g.StartIdx:g.EndIdx]
|
t.Errorf("Wanted %v Got %v\n", expectedStrs, matchStrs)
|
||||||
} else {
|
}
|
||||||
return ""
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
for i, matchStr := range matchStrs {
|
|
||||||
for j, groupStr := range matchStr {
|
|
||||||
if groupStr == "" {
|
|
||||||
if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
|
|
||||||
t.Errorf("Wanted %v Got %v\n", expectedStrs, matchStrs)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if expectedStrs[i][j] != groupStr {
|
|
||||||
t.Errorf("Wanted %v Got %v\n", expectedStrs, matchStrs)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -923,17 +946,18 @@ func TestFindAllSubmatch(t *testing.T) {
|
|||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
matchIndices := regComp.FindAllSubmatch(test.str)
|
matchIndices := regComp.FindAllSubmatch(test.str)
|
||||||
for i := range matchIndices {
|
for i := range matchIndices {
|
||||||
for j := range matchIndices[i] {
|
for j := range matchIndices[i] {
|
||||||
if matchIndices[i][j].IsValid() {
|
if matchIndices[i][j].IsValid() {
|
||||||
if test.result[i][j] != matchIndices[i][j] {
|
if test.result[i][j] != matchIndices[i][j] {
|
||||||
t.Errorf("Wanted %v Got %v\n", test.result, matchIndices)
|
t.Errorf("Wanted %v Got %v\n", test.result, matchIndices)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
|
if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
|
||||||
t.Errorf("Wanted %v Got %v\n", test.result, matchIndices)
|
t.Errorf("Wanted %v Got %v\n", test.result, matchIndices)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user