Compare commits
13 Commits
v0.5.0
...
435588274c
Author | SHA1 | Date | |
---|---|---|---|
|
435588274c | ||
|
a347ebacc4 | ||
|
ccb82f781b | ||
|
09bbf8d3f1 | ||
|
d5b4450e50 | ||
|
45827b5dd3 | ||
|
c26edcb0c4 | ||
|
110298b6a6 | ||
|
eff4c5a5df | ||
0bd7a87797 | |||
9cf1c66653 | |||
9edc99d73c | |||
|
6850396bf9 |
79
compile.go
79
compile.go
@@ -82,6 +82,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
||||||
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
||||||
//
|
//
|
||||||
|
// Another check is made for unescaped brackets - opening brackets are replaced with LBRACKET and closing brackets are replaced with RBRACKET.
|
||||||
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
|
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
|
||||||
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
|
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
|
||||||
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
||||||
@@ -122,6 +123,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
|
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
|
||||||
re_runes = append(re_runes, ESC_BACKSLASH)
|
re_runes = append(re_runes, ESC_BACKSLASH)
|
||||||
i++
|
i++
|
||||||
|
} else if c == '[' && (i == 0 || re_runes_orig[i-1] != '\\')
|
||||||
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
re_runes = append(re_runes, c)
|
re_runes = append(re_runes, c)
|
||||||
}
|
}
|
||||||
@@ -153,23 +157,15 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
|
for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
|
||||||
i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
|
i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
|
||||||
// TODO: Check for escaped characters
|
// Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
|
||||||
|
if i >= len(re_runes) {
|
||||||
// Check ahead for character range
|
return nil, fmt.Errorf("Opening bracket without closing bracket.")
|
||||||
if i < len(re_runes)-2 && re_runes[i+1] == '-' {
|
|
||||||
rangeStart := re_runes[i]
|
|
||||||
rangeEnd := re_runes[i+2]
|
|
||||||
if int(rangeEnd) < int(rangeStart) {
|
|
||||||
return nil, fmt.Errorf("Range is out of order.")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := rangeStart; i <= rangeEnd; i++ {
|
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != ']') { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
|
||||||
toAppend = append(toAppend, i)
|
re_runes[i] = CHAR_RANGE
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
toAppend = append(toAppend, re_runes[i])
|
toAppend = append(toAppend, re_runes[i])
|
||||||
}
|
}
|
||||||
// Replace the last character (which should have been ']', with RBRACKET
|
// Replace the last character (which should have been ']', with RBRACKET
|
||||||
@@ -280,7 +276,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
|
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
|
||||||
*/
|
*/
|
||||||
c := re_postfix[i]
|
c := re_postfix[i]
|
||||||
if isNormalChar(c) {
|
if isNormalChar(c) || isSpecialCharWithMetacharReplacement(c) {
|
||||||
if caseInsensitive {
|
if caseInsensitive {
|
||||||
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
|
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
|
||||||
} else {
|
} else {
|
||||||
@@ -288,7 +284,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// Escape character
|
|
||||||
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
|
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
|
||||||
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
|
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
|
||||||
return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
|
return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
|
||||||
@@ -420,6 +416,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if c == LBRACKET { // Used for character classes
|
if c == LBRACKET { // Used for character classes
|
||||||
|
firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
|
||||||
|
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
|
||||||
i++ // Step forward so we can look at the character class
|
i++ // Step forward so we can look at the character class
|
||||||
var invertMatch bool
|
var invertMatch bool
|
||||||
if re_postfix[i] == '^' {
|
if re_postfix[i] == '^' {
|
||||||
@@ -428,9 +426,14 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
|
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
|
||||||
for i < len(re_postfix) {
|
for i < len(re_postfix) {
|
||||||
if re_postfix[i] == RBRACKET {
|
if firstCharAdded && re_postfix[i] == RBRACKET {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
if re_postfix[i] == CHAR_RANGE {
|
||||||
|
endOfRange = true
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
if re_postfix[i] == '\\' { // Backslash indicates a character to be escaped
|
if re_postfix[i] == '\\' { // Backslash indicates a character to be escaped
|
||||||
if i == len(re_postfix)-1 {
|
if i == len(re_postfix)-1 {
|
||||||
return nil, fmt.Errorf("Stray backslash in character class.")
|
return nil, fmt.Errorf("Stray backslash in character class.")
|
||||||
@@ -483,13 +486,54 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
|
||||||
|
switch re_postfix[i] {
|
||||||
|
case LBRACKET:
|
||||||
|
chars = append(chars, newPostfixCharNode('['))
|
||||||
|
case RBRACKET:
|
||||||
|
chars = append(chars, newPostfixCharNode(']'))
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("Error parsing high-range unicode value in character class.")
|
||||||
|
}
|
||||||
|
}
|
||||||
chars = append(chars, newPostfixCharNode(re_postfix[i]))
|
chars = append(chars, newPostfixCharNode(re_postfix[i]))
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
firstCharAdded = true
|
||||||
|
|
||||||
|
if endOfRange { // The previous character was an unescaped hyphen, which (in the context of a character class) means the character that was last appended is the end of a character range
|
||||||
|
// Things to note:
|
||||||
|
// 1. In PCRE and Go's regex engine, a letter _can_ be surrounded by hyphens in a character class.
|
||||||
|
// Eg. [a-b-c]
|
||||||
|
// While you might think this leads to a syntax error (I thought so), the engine picks 'a-b' as a range,
|
||||||
|
// then treats the second '-' and 'c' as regular characters in the character class.
|
||||||
|
// So this regex becomes "Match a character from 'a' to 'b', a literal hyphen, or 'c' ".
|
||||||
|
// 2. To account for this, the following logic is followed:
|
||||||
|
// a. If the second-to-last postfixNode ie. the start of the range has only one element, then we are in a range.
|
||||||
|
// i. If it has more than one element, then we are actually looking at a literal hyphen, and we will treat is as such.
|
||||||
|
// ii. If either the start or end of the range don't exist in 'chars' ie. something like [-a] or [a-], then too will we treat it as a literal hyphen.
|
||||||
|
// b. The last postfixNode added to 'chars' _must_ only have one character (because it's the end of the range).
|
||||||
|
endRangePostfixNode, err1 := pop(&chars)
|
||||||
|
startRangePostfixNode, err2 := pop(&chars)
|
||||||
|
|
||||||
|
if (err1 != nil || err2 != nil) || len(startRangePostfixNode.contents) != 1 { // Treat it as a regular hyphen
|
||||||
|
chars = append(chars, startRangePostfixNode, newPostfixCharNode('-'), endRangePostfixNode)
|
||||||
|
} else if len(endRangePostfixNode.contents) != 1 { // I don't even know what this would look like, this is just a sanity check
|
||||||
|
return nil, fmt.Errorf("Error parsing character range.")
|
||||||
|
} else {
|
||||||
|
// We have established that they both have a length of 1
|
||||||
|
startRangeRune := startRangePostfixNode.contents[0]
|
||||||
|
endRangeRune := endRangePostfixNode.contents[0]
|
||||||
|
chars = append(chars, newPostfixCharNode(genRange(startRangeRune, endRangeRune+1)...))
|
||||||
|
}
|
||||||
|
|
||||||
|
endOfRange = false // Reset the flag
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.
|
if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.
|
||||||
return nil, fmt.Errorf("Opening bracket without closing bracket.")
|
return nil, fmt.Errorf("Opening bracket without closing bracket.")
|
||||||
}
|
}
|
||||||
|
|
||||||
outQueue = append(outQueue, newCharClassNode(chars, invertMatch))
|
outQueue = append(outQueue, newCharClassNode(chars, invertMatch))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -681,8 +725,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
|
|
||||||
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
|
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
|
||||||
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
|
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
|
||||||
// Uncommenting this seems to make one of the test cases fail. Why?
|
replaceByValue(state.except, ESC_BACKSLASH, '\\')
|
||||||
// replaceByValue(state.except, ESC_BACKSLASH, '\\')
|
|
||||||
|
|
||||||
nfa = append(nfa, &state)
|
nfa = append(nfa, &state)
|
||||||
}
|
}
|
||||||
|
24
misc.go
24
misc.go
@@ -15,6 +15,14 @@ var LPAREN_CHAR rune = 0xF0004 // Parentheses in regex are concatenated with thi
|
|||||||
var RPAREN_CHAR rune = 0xF0005
|
var RPAREN_CHAR rune = 0xF0005
|
||||||
var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN
|
var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN
|
||||||
var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash
|
var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash
|
||||||
|
var CHAR_RANGE rune = 0xF0008 // Represents a character range
|
||||||
|
|
||||||
|
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', '~', '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
|
||||||
|
|
||||||
|
// An interface for int and rune, which are identical
|
||||||
|
type character interface {
|
||||||
|
int | rune
|
||||||
|
}
|
||||||
|
|
||||||
// Returns true if str[idx] and str[idx-1] are separated by a word boundary.
|
// Returns true if str[idx] and str[idx-1] are separated by a word boundary.
|
||||||
func isWordBoundary(str []rune, idx int) bool {
|
func isWordBoundary(str []rune, idx int) bool {
|
||||||
@@ -26,9 +34,17 @@ func isWordBoundary(str []rune, idx int) bool {
|
|||||||
return wbounded
|
return wbounded
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isSpecialChar(c rune) bool {
|
||||||
|
return slices.Contains(specialChars, c)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some special characters have metacharacter replacements. These characters, when encountered in their literal form, can be treated as regular characters.
|
||||||
|
func isSpecialCharWithMetacharReplacement(c rune) bool {
|
||||||
|
return slices.Contains([]rune{'[', ']'}, c)
|
||||||
|
}
|
||||||
|
|
||||||
func isNormalChar(c rune) bool {
|
func isNormalChar(c rune) bool {
|
||||||
specialChars := []rune(`?*\^${}()+|[].~<>`)
|
|
||||||
specialChars = append(specialChars, LBRACKET, RBRACKET, NONCAPLPAREN_CHAR)
|
|
||||||
return !slices.Contains(specialChars, c)
|
return !slices.Contains(specialChars, c)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,8 +125,8 @@ func Reduce[T any](slc []T, fn func(T, T) T) T {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate numbers in a range - start (inclusive) to end (exclusive)
|
// Generate numbers in a range - start (inclusive) to end (exclusive)
|
||||||
func genRange(start, end int) []int {
|
func genRange[T character](start, end T) []T {
|
||||||
toRet := make([]int, end-start)
|
toRet := make([]T, end-start)
|
||||||
for i := start; i < end; i++ {
|
for i := start; i < end; i++ {
|
||||||
toRet[i-start] = i
|
toRet[i-start] = i
|
||||||
}
|
}
|
||||||
|
@@ -116,6 +116,13 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
|
|||||||
case 'v': // Vertical tab
|
case 'v': // Vertical tab
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = CHARACTER
|
||||||
toReturn.contents = append(toReturn.contents, rune(11))
|
toReturn.contents = append(toReturn.contents, rune(11))
|
||||||
|
case '-': // Literal hyphen - only in character class
|
||||||
|
if inCharClass {
|
||||||
|
toReturn.nodetype = CHARACTER
|
||||||
|
toReturn.contents = append(toReturn.contents, '-')
|
||||||
|
} else {
|
||||||
|
return postfixNode{}, fmt.Errorf("Invalid escape character.")
|
||||||
|
}
|
||||||
default: // None of the above - append it as a regular character
|
default: // None of the above - append it as a regular character
|
||||||
if isNormalChar(c) { // Normal characters cannot be escaped
|
if isNormalChar(c) { // Normal characters cannot be escaped
|
||||||
return postfixNode{}, fmt.Errorf("Invalid escape character.")
|
return postfixNode{}, fmt.Errorf("Invalid escape character.")
|
||||||
|
410
re_test.go
410
re_test.go
@@ -1,221 +1,281 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"slices"
|
"slices"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
var reTests = []struct {
|
var reTests = []struct {
|
||||||
re string
|
re string
|
||||||
|
flags []ReFlag
|
||||||
str string
|
str string
|
||||||
result []Group // Stores all zero-groups in the match
|
result []Group // Stores all zero-groups in the match
|
||||||
}{
|
}{
|
||||||
{"a", "abc", []Group{{0, 1}}},
|
{"a", nil, "abc", []Group{{0, 1}}},
|
||||||
{"a", "bca", []Group{{2, 3}}},
|
{"a", nil, "bca", []Group{{2, 3}}},
|
||||||
{"l", "ggllgg", []Group{{2, 3}, {3, 4}}},
|
{"l", nil, "ggllgg", []Group{{2, 3}, {3, 4}}},
|
||||||
{"(b|c)", "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}},
|
{"(b|c)", nil, "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}},
|
||||||
{"a+", "brerereraaaaabbbbb", []Group{{8, 13}}},
|
{"a+", nil, "brerereraaaaabbbbb", []Group{{8, 13}}},
|
||||||
{"ab+", "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}},
|
{"ab+", nil, "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}},
|
||||||
{"(b|c|A)", "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}},
|
{"(b|c|A)", nil, "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}},
|
||||||
{"ab*", "a", []Group{{0, 1}}},
|
{"ab*", nil, "a", []Group{{0, 1}}},
|
||||||
{"ab*", "abb", []Group{{0, 3}}},
|
{"ab*", nil, "abb", []Group{{0, 3}}},
|
||||||
{"a*b", "aaab", []Group{{0, 4}}},
|
{"a*b", nil, "aaab", []Group{{0, 4}}},
|
||||||
{"a*b", "qwqw", []Group{}},
|
{"a*b", nil, "qwqw", []Group{}},
|
||||||
{"(abc)*", "abcabcabc", []Group{{0, 9}, {9, 9}}},
|
{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
|
||||||
{"((abc)|(def))*", "abcdef", []Group{{0, 6}, {6, 6}}},
|
{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
|
||||||
{"(abc)*|(def)*", "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
|
{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
|
||||||
{"b*a*a", "bba", []Group{{0, 3}}},
|
{"b*a*a", nil, "bba", []Group{{0, 3}}},
|
||||||
{"(ab)+", "abcabddd", []Group{{0, 2}, {3, 5}}},
|
{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
|
||||||
{"a(b(c|d)*)*", "abccbd", []Group{{0, 6}}},
|
{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
|
||||||
{"a(b|c)*d+", "abccdd", []Group{{0, 6}}},
|
{"a(b|c)*d+", nil, "abccdd", []Group{{0, 6}}},
|
||||||
{"a*", "", []Group{{0, 0}}},
|
{"a*", nil, "", []Group{{0, 0}}},
|
||||||
{"a|b", "c", []Group{}},
|
{"a|b", nil, "c", []Group{}},
|
||||||
{"(a|b)*c", "aabbc", []Group{{0, 5}}},
|
{"(a|b)*c", nil, "aabbc", []Group{{0, 5}}},
|
||||||
{"a(b|b)", "ab", []Group{{0, 2}}},
|
{"a(b|b)", nil, "ab", []Group{{0, 2}}},
|
||||||
{"a*", "aaaaaaaa", []Group{{0, 8}, {8, 8}}},
|
{"a*", nil, "aaaaaaaa", []Group{{0, 8}, {8, 8}}},
|
||||||
|
|
||||||
{"ab?", "ab", []Group{{0, 2}}},
|
{"ab?", nil, "ab", []Group{{0, 2}}},
|
||||||
{"a?b", "ab", []Group{{0, 2}}},
|
{"a?b", nil, "ab", []Group{{0, 2}}},
|
||||||
{"a?", "", []Group{{0, 0}}},
|
{"a?", nil, "", []Group{{0, 0}}},
|
||||||
{"a?b?c", "a", []Group{}},
|
{"a?b?c", nil, "a", []Group{}},
|
||||||
{"a?b?c?", "ab", []Group{{0, 2}, {2, 2}}},
|
{"a?b?c?", nil, "ab", []Group{{0, 2}, {2, 2}}},
|
||||||
{"a?b?c?", "ac", []Group{{0, 2}, {2, 2}}},
|
{"a?b?c?", nil, "ac", []Group{{0, 2}, {2, 2}}},
|
||||||
{"a?b?c", "abc", []Group{{0, 3}}},
|
{"a?b?c", nil, "abc", []Group{{0, 3}}},
|
||||||
{"a?b?c", "acb", []Group{{0, 2}}},
|
{"a?b?c", nil, "acb", []Group{{0, 2}}},
|
||||||
|
|
||||||
{"[abc]", "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}},
|
{"[abc]", nil, "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}},
|
||||||
{"[ab]c", "ab", []Group{}},
|
{"[ab]c", nil, "ab", []Group{}},
|
||||||
{"g[ab]c", "gac", []Group{{0, 3}}},
|
{"g[ab]c", nil, "gac", []Group{{0, 3}}},
|
||||||
{"g[ab]c", "gbc", []Group{{0, 3}}},
|
{"g[ab]c", nil, "gbc", []Group{{0, 3}}},
|
||||||
{"g[ab]c", "gc", []Group{}},
|
{"g[ab]c", nil, "gc", []Group{}},
|
||||||
{"g[ab]c", "gfc", []Group{}},
|
{"g[ab]c", nil, "gfc", []Group{}},
|
||||||
{"[ab]*", "aabbbabaababab", []Group{{0, 14}, {14, 14}}},
|
{"[ab]*", nil, "aabbbabaababab", []Group{{0, 14}, {14, 14}}},
|
||||||
{"[ab]+", "aabbbablaababab", []Group{{0, 7}, {8, 15}}},
|
{"[ab]+", nil, "aabbbablaababab", []Group{{0, 7}, {8, 15}}},
|
||||||
{"[Ff]r[Uu]it", "fruit", []Group{{0, 5}}},
|
{"[Ff]r[Uu]it", nil, "fruit", []Group{{0, 5}}},
|
||||||
{"[Ff]r[Uu]it", "FrUit", []Group{{0, 5}}},
|
{"[Ff]r[Uu]it", nil, "FrUit", []Group{{0, 5}}},
|
||||||
{"[Ff]r[Uu|]it", "Fr|it", []Group{{0, 5}}},
|
{"[Ff]r[Uu|]it", nil, "Fr|it", []Group{{0, 5}}},
|
||||||
{"[Ff]r([Uu]|[pP])it", "Frpit", []Group{{0, 5}}},
|
{"[Ff]r([Uu]|[pP])it", nil, "Frpit", []Group{{0, 5}}},
|
||||||
{"[Ff]r[Uu]|[pP]it", "Frpit", []Group{{2, 5}}},
|
{"[Ff]r[Uu]|[pP]it", nil, "Frpit", []Group{{2, 5}}},
|
||||||
{"[a-zA-Z]+", "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}},
|
{"[a-zA-Z]+", nil, "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}},
|
||||||
|
|
||||||
{".+", "Hello, how is it going?", []Group{{0, 23}}},
|
{".+", nil, "Hello, how is it going?", []Group{{0, 23}}},
|
||||||
{"a.", "a ", []Group{{0, 2}}},
|
{"a.", nil, "a ", []Group{{0, 2}}},
|
||||||
{"a.b", "a/b", []Group{{0, 3}}},
|
{"a.b", nil, "a/b", []Group{{0, 3}}},
|
||||||
{".", "a ", []Group{{0, 1}, {1, 2}}},
|
{".", nil, "a ", []Group{{0, 1}, {1, 2}}},
|
||||||
{"a.", "a ", []Group{{0, 2}}},
|
{"a.", nil, "a ", []Group{{0, 2}}},
|
||||||
{".+b", "abc", []Group{{0, 2}}},
|
{".+b", nil, "abc", []Group{{0, 2}}},
|
||||||
|
|
||||||
{`\d`, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}},
|
{`\d`, nil, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}},
|
||||||
{`\\`, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}},
|
{`\\`, nil, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}},
|
||||||
{`\W`, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}},
|
{`\W`, nil, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}},
|
||||||
{`\w`, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}},
|
{`\w`, nil, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}},
|
||||||
{`\s`, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
{`\s`, nil, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
||||||
{`\<`, "<HTML><body>", []Group{{0, 1}, {6, 7}}},
|
{`\<`, nil, "<HTML><body>", []Group{{0, 1}, {6, 7}}},
|
||||||
{`\(.+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}},
|
{`\(.+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}},
|
||||||
|
|
||||||
{"[^abc]+", "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}},
|
{"[^abc]+", nil, "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}},
|
||||||
{"[^a]+", "qqqaq", []Group{{0, 3}, {4, 5}}},
|
{"[^a]+", nil, "qqqaq", []Group{{0, 3}, {4, 5}}},
|
||||||
{"[^0-9]+", "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}},
|
{"[^0-9]+", nil, "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}},
|
||||||
{"[^abc]+", "ababababbababaccacacacaca", []Group{}},
|
{"[^abc]+", nil, "ababababbababaccacacacaca", []Group{}},
|
||||||
{`\[`, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}},
|
{`\[`, nil, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}},
|
||||||
{`\([^)]+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}},
|
{`\([^)]+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}},
|
||||||
|
|
||||||
{"^ab", "ab bab", []Group{{0, 2}}},
|
{"^ab", nil, "ab bab", []Group{{0, 2}}},
|
||||||
{"^aaaa^", "aaaaaaaa", []Group{}},
|
{"^aaaa^", nil, "aaaaaaaa", []Group{}},
|
||||||
{"^([bB][Gg])", "bG", []Group{{0, 2}}},
|
{"^([bB][Gg])", nil, "bG", []Group{{0, 2}}},
|
||||||
{"b$", "ba", []Group{}},
|
{"b$", nil, "ba", []Group{}},
|
||||||
{"(boy|girl)$", "girlf", []Group{}},
|
{"(boy|girl)$", nil, "girlf", []Group{}},
|
||||||
{`\bint\b`, "print int integer", []Group{{6, 9}}},
|
{`\bint\b`, nil, "print int integer", []Group{{6, 9}}},
|
||||||
{`int\b`, "ints", []Group{}},
|
{`int\b`, nil, "ints", []Group{}},
|
||||||
{`int(\b|a)`, "inta", []Group{{0, 4}}},
|
{`int(\b|a)`, nil, "inta", []Group{{0, 4}}},
|
||||||
{`\b\d+\b`, "511 a3 43", []Group{{0, 3}, {7, 9}}},
|
{`\b\d+\b`, nil, "511 a3 43", []Group{{0, 3}, {7, 9}}},
|
||||||
{`\Bint\B`, "prints int integer print", []Group{{2, 5}}},
|
{`\Bint\B`, nil, "prints int integer print", []Group{{2, 5}}},
|
||||||
{`^`, "5^3^2", []Group{{0, 0}}},
|
{`^`, nil, "5^3^2", []Group{{0, 0}}},
|
||||||
{`\^`, "5^3^2", []Group{{1, 2}, {3, 4}}},
|
{`\^`, nil, "5^3^2", []Group{{1, 2}, {3, 4}}},
|
||||||
{`pool$`, "pool carpool", []Group{{8, 12}}},
|
{`pool$`, nil, "pool carpool", []Group{{8, 12}}},
|
||||||
{`^int$`, "print int integer", []Group{}},
|
{`^int$`, nil, "print int integer", []Group{}},
|
||||||
{`^int$`, "int", []Group{{0, 3}}},
|
{`^int$`, nil, "int", []Group{{0, 3}}},
|
||||||
{`b*`, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}},
|
{`b*`, nil, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}},
|
||||||
|
|
||||||
{"a{4}", "aabaaa", []Group{}},
|
{"a{4}", nil, "aabaaa", []Group{}},
|
||||||
{"ab{5}", "abbbbbab", []Group{{0, 6}}},
|
{"ab{5}", nil, "abbbbbab", []Group{{0, 6}}},
|
||||||
{"(a|b){3,4}", "aba", []Group{{0, 3}}},
|
{"(a|b){3,4}", nil, "aba", []Group{{0, 3}}},
|
||||||
{"(a|b){3,4}", "ababaa", []Group{{0, 4}}},
|
{"(a|b){3,4}", nil, "ababaa", []Group{{0, 4}}},
|
||||||
{"(bc){5,}", "bcbcbcbcbcbcbcbc", []Group{{0, 16}}},
|
{"(bc){5,}", nil, "bcbcbcbcbcbcbcbc", []Group{{0, 16}}},
|
||||||
{`\d{3,4}`, "1209", []Group{{0, 4}}},
|
{`\d{3,4}`, nil, "1209", []Group{{0, 4}}},
|
||||||
{`\d{3,4}`, "109", []Group{{0, 3}}},
|
{`\d{3,4}`, nil, "109", []Group{{0, 3}}},
|
||||||
{`\d{3,4}`, "5", []Group{}},
|
{`\d{3,4}`, nil, "5", []Group{}},
|
||||||
{`\d{3,4}`, "123135", []Group{{0, 4}}},
|
{`\d{3,4}`, nil, "123135", []Group{{0, 4}}},
|
||||||
{`\d{3,4}`, "89a-0", []Group{}},
|
{`\d{3,4}`, nil, "89a-0", []Group{}},
|
||||||
{`\d{3,4}`, "ababab555", []Group{{6, 9}}},
|
{`\d{3,4}`, nil, "ababab555", []Group{{6, 9}}},
|
||||||
{`\bpaint\b`, "paints", []Group{}},
|
{`\bpaint\b`, nil, "paints", []Group{}},
|
||||||
{`\b\w{5}\b`, "paint", []Group{{0, 5}}},
|
{`\b\w{5}\b`, nil, "paint", []Group{{0, 5}}},
|
||||||
{`[^\w]`, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
|
{`[^\w]`, nil, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
|
||||||
{`[^\W]`, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
|
{`[^\W]`, nil, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
|
||||||
{`[\[\]]`, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
{`[\[\]]`, nil, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
||||||
|
|
||||||
// Unicode tests
|
// Unicode tests
|
||||||
{`.+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
|
{`.+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
|
||||||
{`a.b`, "a²b", []Group{{0, 3}}},
|
{`a.b`, nil, "a²b", []Group{{0, 3}}},
|
||||||
{`[^a]+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
|
{`[^a]+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
|
||||||
|
|
||||||
// Fun experiment - AI-generated tests
|
// Fun experiment - AI-generated tests
|
||||||
{"(abc|def|ghi)", "abcdefg", []Group{{0, 3}, {3, 6}}},
|
{"(abc|def|ghi)", nil, "abcdefg", []Group{{0, 3}, {3, 6}}},
|
||||||
{"a(b|c)d", "abcd", []Group{}},
|
{"a(b|c)d", nil, "abcd", []Group{}},
|
||||||
{"a(b|c)*d", "abcbcd", []Group{{0, 6}}},
|
{"a(b|c)*d", nil, "abcbcd", []Group{{0, 6}}},
|
||||||
{"a(b|c)+d", "abcbcd", []Group{{0, 6}}},
|
{"a(b|c)+d", nil, "abcbcd", []Group{{0, 6}}},
|
||||||
{"a(b|c)?d", "abd", []Group{{0, 3}}},
|
{"a(b|c)?d", nil, "abd", []Group{{0, 3}}},
|
||||||
{".+", "hello world", []Group{{0, 11}}},
|
{".+", nil, "hello world", []Group{{0, 11}}},
|
||||||
{"a.b", "aXb", []Group{{0, 3}}},
|
{"a.b", nil, "aXb", []Group{{0, 3}}},
|
||||||
{"a.*b", "aXb", []Group{{0, 3}}},
|
{"a.*b", nil, "aXb", []Group{{0, 3}}},
|
||||||
{"a.{2,3}b", "aXXb", []Group{{0, 4}}},
|
{"a.{2,3}b", nil, "aXXb", []Group{{0, 4}}},
|
||||||
{"a.{2,}b", "aXXXb", []Group{{0, 5}}},
|
{"a.{2,}b", nil, "aXXXb", []Group{{0, 5}}},
|
||||||
{"a.{0,3}b", "ab", []Group{{0, 2}}},
|
{"a.{0,3}b", nil, "ab", []Group{{0, 2}}},
|
||||||
{"[abc]+", "abcabc", []Group{{0, 6}}},
|
{"[abc]+", nil, "abcabc", []Group{{0, 6}}},
|
||||||
{"[a-zA-Z]+", "HelloWorld", []Group{{0, 10}}},
|
{"[a-zA-Z]+", nil, "HelloWorld", []Group{{0, 10}}},
|
||||||
{"[^abc]+", "defghi", []Group{{0, 6}}},
|
{"[^abc]+", nil, "defghi", []Group{{0, 6}}},
|
||||||
{"^hello", "hello world", []Group{{0, 5}}},
|
{"^hello", nil, "hello world", []Group{{0, 5}}},
|
||||||
{"world$", "hello world", []Group{{6, 11}}},
|
{"world$", nil, "hello world", []Group{{6, 11}}},
|
||||||
{`\bhello\b`, "hello world", []Group{{0, 5}}},
|
{`\bhello\b`, nil, "hello world", []Group{{0, 5}}},
|
||||||
{`\Bhello\B`, "hello world", []Group{}},
|
{`\Bhello\B`, nil, "hello world", []Group{}},
|
||||||
{"(hello|world)", "hello world", []Group{{0, 5}, {6, 11}}},
|
{"(hello|world)", nil, "hello world", []Group{{0, 5}, {6, 11}}},
|
||||||
{"(hello|world)+", "hello world", []Group{{0, 5}, {6, 11}}},
|
{"(hello|world)+", nil, "hello world", []Group{{0, 5}, {6, 11}}},
|
||||||
{"(hello|world)*", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
{"(hello|world)*", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
||||||
{"(hello|world)?", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
{"(hello|world)?", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
||||||
{"ú.+ï", "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}},
|
{"ú.+ï", nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}},
|
||||||
{"(?=hello)", "hello world", []Group{{0, 0}}},
|
{"(?=hello)", nil, "hello world", []Group{{0, 0}}},
|
||||||
{"(?!hello)", "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
{"(?!hello)", nil, "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
||||||
{"(?<=hello)", "hello world", []Group{{5, 5}}},
|
{"(?<=hello)", nil, "hello world", []Group{{5, 5}}},
|
||||||
{"(?<!hello)", "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
{"(?<!hello)", nil, "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
||||||
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "40", []Group{{0, 2}}},
|
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "40", []Group{{0, 2}}},
|
||||||
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "040", []Group{}},
|
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "040", []Group{}},
|
||||||
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "400", []Group{{0, 3}}},
|
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "400", []Group{{0, 3}}},
|
||||||
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "4000", []Group{}},
|
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "4000", []Group{}},
|
||||||
{"a{1,3}", "aaaaa", []Group{{0, 3}, {3, 5}}},
|
{"a{1,3}", nil, "aaaaa", []Group{{0, 3}, {3, 5}}},
|
||||||
{`\\[ab\\]`, "a", []Group{}},
|
{`\\[ab\\]`, nil, "a", []Group{}},
|
||||||
{`\\[ab\\]`, `\a`, []Group{{0, 2}}},
|
{`\\[ab\\]`, nil, `\a`, []Group{{0, 2}}},
|
||||||
|
|
||||||
// Lookaround tests
|
// Lookaround tests
|
||||||
{"(?<=bo)y", "boy", []Group{{2, 3}}},
|
{"(?<=bo)y", nil, "boy", []Group{{2, 3}}},
|
||||||
{"bo(?=y)", "boy", []Group{{0, 2}}},
|
{"bo(?=y)", nil, "boy", []Group{{0, 2}}},
|
||||||
{"(?<=f)f+(?=f)", "fffff", []Group{{1, 4}}},
|
{"(?<=f)f+(?=f)", nil, "fffff", []Group{{1, 4}}},
|
||||||
{"(?<=f)f+(?=f)", "fffffa", []Group{{1, 4}}},
|
{"(?<=f)f+(?=f)", nil, "fffffa", []Group{{1, 4}}},
|
||||||
|
|
||||||
// Test cases from Python's RE test suite
|
// Test cases from Python's RE test suite
|
||||||
{`[\1]`, "\x01", []Group{{0, 1}}},
|
{`[\1]`, nil, "\x01", []Group{{0, 1}}},
|
||||||
|
|
||||||
{`\0`, "\x00", []Group{{0, 1}}},
|
{`\0`, nil, "\x00", []Group{{0, 1}}},
|
||||||
{`[\0a]`, "\x00", []Group{{0, 1}}},
|
{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
|
||||||
{`[\0a]`, "\x00", []Group{{0, 1}}},
|
{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
|
||||||
{`[a\0]`, "\x00", []Group{{0, 1}}},
|
{`[a\0]`, nil, "\x00", []Group{{0, 1}}},
|
||||||
{`[^a\0]`, "\x00", []Group{}},
|
{`[^a\0]`, nil, "\x00", []Group{}},
|
||||||
|
|
||||||
{`\a[\b]\f\n\r\t\v`, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
|
{`\a[\b]\f\n\r\t\v`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
|
||||||
{`[\a][\b][\f][\n][\r][\t][\v]`, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
|
{`[\a][\b][\f][\n][\r][\t][\v]`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
|
||||||
{`\u`, "", nil},
|
{`\u`, nil, "", nil},
|
||||||
{`\xff`, "ÿ", []Group{{0, 1}}},
|
{`\xff`, nil, "ÿ", []Group{{0, 1}}},
|
||||||
{`\x00ffffffffffffff`, "\xff", []Group{}},
|
{`\x00ffffffffffffff`, nil, "\xff", []Group{}},
|
||||||
{`\x00f`, "\x0f", []Group{}},
|
{`\x00f`, nil, "\x0f", []Group{}},
|
||||||
{`\x00fe`, "\xfe", []Group{}},
|
{`\x00fe`, nil, "\xfe", []Group{}},
|
||||||
{`^\w+=(\\[\000-\277]|[^\n\\])*`, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
|
{`^\w+=(\\[\000-\277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
|
||||||
|
|
||||||
|
{`a.b`, nil, `acb`, []Group{{0, 3}}},
|
||||||
|
{`a.b`, nil, "a\nb", []Group{}},
|
||||||
|
{`a.*b`, nil, "acc\nccb", []Group{}},
|
||||||
|
{`a.{4,5}b`, nil, "acc\nccb", []Group{}},
|
||||||
|
{`a.b`, nil, "a\rb", []Group{{0, 3}}},
|
||||||
|
{`a.b`, []ReFlag{RE_MULTILINE}, "a\nb", []Group{{0, 3}}},
|
||||||
|
{`a.*b`, []ReFlag{RE_MULTILINE}, "acc\nccb", []Group{{0, 7}}},
|
||||||
|
{`a.{4,5}b`, []ReFlag{RE_MULTILINE}, "acc\nccb", []Group{{0, 7}}},
|
||||||
|
|
||||||
|
{`)`, nil, ``, nil},
|
||||||
|
{`^$`, nil, ``, []Group{{0, 0}}},
|
||||||
|
{`abc`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`abc`, nil, `xbc`, []Group{}},
|
||||||
|
{`abc`, nil, `axc`, []Group{}},
|
||||||
|
{`abc`, nil, `abx`, []Group{}},
|
||||||
|
{`abc`, nil, `xabcy`, []Group{{1, 4}}},
|
||||||
|
{`abc`, nil, `ababc`, []Group{{2, 5}}},
|
||||||
|
{`ab*c`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`ab*bc`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`ab*bc`, nil, `abbc`, []Group{{0, 4}}},
|
||||||
|
{`ab*bc`, nil, `abbbbc`, []Group{{0, 6}}},
|
||||||
|
{`ab+bc`, nil, `abbc`, []Group{{0, 4}}},
|
||||||
|
{`ab+bc`, nil, `abc`, []Group{}},
|
||||||
|
{`ab+bc`, nil, `abq`, []Group{}},
|
||||||
|
{`ab+bc`, nil, `abbbbc`, []Group{{0, 6}}},
|
||||||
|
{`ab?bc`, nil, `abbc`, []Group{{0, 4}}},
|
||||||
|
{`ab?bc`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`ab?bc`, nil, `abbbbc`, []Group{}},
|
||||||
|
{`ab?c`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`^abc$`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`^abc$`, nil, `abcc`, []Group{}},
|
||||||
|
{`^abc`, nil, `abcc`, []Group{{0, 3}}},
|
||||||
|
{`^abc$`, nil, `aabc`, []Group{}},
|
||||||
|
{`abc$`, nil, `aabc`, []Group{{1, 4}}},
|
||||||
|
{`^`, nil, `abc`, []Group{{0, 0}}},
|
||||||
|
{`$`, nil, `abc`, []Group{{3, 3}}},
|
||||||
|
{`a.c`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`a.c`, nil, `axc`, []Group{{0, 3}}},
|
||||||
|
{`a.*c`, nil, `axyzc`, []Group{{0, 5}}},
|
||||||
|
{`a.*c`, nil, `axyzd`, []Group{}},
|
||||||
|
{`a[bc]d`, nil, `abc`, []Group{}},
|
||||||
|
{`a[bc]d`, nil, `abd`, []Group{{0, 3}}},
|
||||||
|
{`a[b-d]e`, nil, `abd`, []Group{}},
|
||||||
|
{`a[b-d]e`, nil, `ace`, []Group{{0, 3}}},
|
||||||
|
{`a[b-d]`, nil, `aac`, []Group{{1, 3}}},
|
||||||
|
{`a[-b]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
|
||||||
|
{`a[\-b]`, nil, `a-`, []Group{{0, 2}}},
|
||||||
|
{`a[b-]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
|
||||||
|
|
||||||
|
{`a[]b`, nil, `-`, nil},
|
||||||
|
{`a[`, nil, `-`, nil},
|
||||||
|
{`a\`, nil, `-`, nil},
|
||||||
|
{`abc)`, nil, `-`, nil},
|
||||||
|
{`(abc`, nil, `-`, nil},
|
||||||
|
{`a]`, nil, `a]`, []Group{{0, 2}}},
|
||||||
|
{`a[]]b`, nil, `a]b`, []Group{{0, 3}}},
|
||||||
|
|
||||||
// Todo - add numeric range tests
|
// Todo - add numeric range tests
|
||||||
}
|
}
|
||||||
|
|
||||||
var groupTests = []struct {
|
var groupTests = []struct {
|
||||||
re string
|
re string
|
||||||
|
flags []ReFlag
|
||||||
str string
|
str string
|
||||||
result []Match
|
result []Match
|
||||||
}{
|
}{
|
||||||
{"(a)(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
|
{"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
|
||||||
{"((a))(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
|
{"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
|
||||||
{"(0)", "ab", []Match{[]Group{}}},
|
{"(0)", nil, "ab", []Match{[]Group{}}},
|
||||||
{"(a)b", "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
|
{"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
|
||||||
{"a(b)", "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
|
{"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
|
||||||
{"(a|b)", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
|
{"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
|
||||||
{"(a)|(b)", "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}},
|
{"(a)|(b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}},
|
||||||
{"(a+)(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}},
|
{"(a+)(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}},
|
||||||
{"(a+)|(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
||||||
{"(a+)(aa)", "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
|
{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
|
||||||
{"(aaaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
||||||
{"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
|
{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
|
||||||
{"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
|
{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
|
||||||
{"(aaaa)|(aaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
||||||
{"(a)|(aa)", "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
|
{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
|
||||||
{"(a?)a?", "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
|
{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
|
||||||
{"(a?)a?", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
|
{"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
|
||||||
{"(a?)a?", "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
|
{"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
|
||||||
{"a((b.d){3})", "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}},
|
{"a((b.d){3})", nil, "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}},
|
||||||
{`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, `abcdefghijkl9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}},
|
{`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, nil, `abcdefghijkl9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFindAllMatches(t *testing.T) {
|
func TestFindAllMatches(t *testing.T) {
|
||||||
for _, test := range reTests {
|
for _, test := range reTests {
|
||||||
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
||||||
regComp, err := Compile(test.re)
|
regComp, err := Compile(test.re, test.flags...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(fmt.Errorf("Test Error: %v", err))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
matchIndices := FindAllMatches(regComp, test.str)
|
matchIndices := FindAllMatches(regComp, test.str)
|
||||||
@@ -234,7 +294,7 @@ func TestFindAllMatches(t *testing.T) {
|
|||||||
func TestFindString(t *testing.T) {
|
func TestFindString(t *testing.T) {
|
||||||
for _, test := range reTests {
|
for _, test := range reTests {
|
||||||
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
||||||
regComp, err := Compile(test.re)
|
regComp, err := Compile(test.re, test.flags...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
@@ -259,7 +319,7 @@ func TestFindString(t *testing.T) {
|
|||||||
func TestFindAllGroups(t *testing.T) {
|
func TestFindAllGroups(t *testing.T) {
|
||||||
for _, test := range groupTests {
|
for _, test := range groupTests {
|
||||||
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
||||||
regComp, err := Compile(test.re)
|
regComp, err := Compile(test.re, test.flags...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
|
Reference in New Issue
Block a user