21 Commits

Author SHA1 Message Date
4c96cfa06c Loosened restrictions for concatenation - It's okay if one of the
elements is missing
2025-01-25 13:09:47 -05:00
bd56c9c7b5 Updated test case 2025-01-25 13:09:29 -05:00
6cf523b7ea Added new assertion that always evaluates to true 2025-01-25 13:04:51 -05:00
ed2671849d Allowed creation of empty non-capturing groups 2025-01-25 13:04:36 -05:00
2309d35d30 Added more test cases 2025-01-25 13:04:08 -05:00
5afb7dd04a Added detection of empty parentheses, as zero-length matches 2025-01-25 12:44:40 -05:00
d5007a3fd5 Added more test cases 2025-01-25 12:23:15 -05:00
5c4d979d7e Fixed bug in character class implementation 2025-01-24 19:48:53 -05:00
Aadhavan Srinivasan
435588274c WIP - fixing character classes 2025-01-24 17:06:19 -05:00
Aadhavan Srinivasan
a347ebacc4 Added more test cases 2025-01-24 17:06:00 -05:00
Aadhavan Srinivasan
ccb82f781b Enforce the rule that character classes must have at least one character; interpret literal closing brackets as regular characters 2025-01-24 15:50:36 -05:00
Aadhavan Srinivasan
09bbf8d3f1 Refactored isNormalChar(), wrote function to get special characters that have metachar replacements 2025-01-24 15:49:33 -05:00
Aadhavan Srinivasan
d5b4450e50 Added more test cases (1 failing) 2025-01-24 14:58:18 -05:00
Aadhavan Srinivasan
45827b5dd3 Allow hyphen to be escaped inside character class 2025-01-24 14:58:07 -05:00
Aadhavan Srinivasan
c26edcb0c4 Fixed edge cases with character ranges and character classes 2025-01-24 14:57:47 -05:00
Aadhavan Srinivasan
110298b6a6 Added 'flags' field to test struct for all-group tests 2025-01-24 11:11:48 -05:00
Aadhavan Srinivasan
eff4c5a5df Added 'flags' field to test struct for 0-group tests 2025-01-24 11:10:01 -05:00
0bd7a87797 Removed old comment 2025-01-22 20:27:35 -05:00
9cf1c66653 Implemented character range detection later in the code, using a metacharacter 2025-01-22 20:26:58 -05:00
9edc99d73c Modified genRange() so that it can work on ints and runes 2025-01-22 20:25:49 -05:00
Aadhavan Srinivasan
6850396bf9 Removed character range creation from the first part of shuntingYard() (the part that adds concatenation operators), because octal and hex values haven't yet been deciphered at this point in the code 2025-01-22 16:51:00 -05:00
5 changed files with 445 additions and 226 deletions

View File

@@ -82,6 +82,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
//
// Another check is made for unescaped brackets - opening brackets are replaced with LBRACKET and closing brackets are replaced with RBRACKET.
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
// whether '\\[a]' has an escaped opening bracket (it doesn't).
@@ -122,6 +123,12 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
re_runes = append(re_runes, ESC_BACKSLASH)
i++
} else if c == '[' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
re_runes = append(re_runes, LBRACKET)
continue
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
re_runes = append(re_runes, RBRACKET)
continue
} else {
re_runes = append(re_runes, c)
}
@@ -141,39 +148,28 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i := 0
for i < len(re_runes) {
re_postfix = append(re_postfix, re_runes[i])
if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped. Inside this block, the only task is to expand character ranges into their constituent characters.
re_postfix[len(re_postfix)-1] = LBRACKET // Replace the '[' character with LBRACKET. This allows for easier parsing of all characters (including opening and closing brackets) within the character class
toAppend := make([]rune, 0) // Holds all the runes in the current character class
if i < len(re_runes)-1 && re_runes[i+1] == '^' { // Inverting class - match everything NOT in brackets
re_postfix = append(re_postfix, '^')
i++ // Skip opening bracket and caret
if re_runes[i] == LBRACKET && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped.
toAppend := make([]rune, 0) // Holds all the runes in the current character class
i++ // Skip past LBRACKET, because it was already added
if i >= len(re_runes) { // Sanity check before we start
return nil, fmt.Errorf("Opening bracket without closing bracket.")
}
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
return nil, fmt.Errorf("Empty character class.")
}
for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// TODO: Check for escaped characters
// Check ahead for character range
if i < len(re_runes)-2 && re_runes[i+1] == '-' {
rangeStart := re_runes[i]
rangeEnd := re_runes[i+2]
if int(rangeEnd) < int(rangeStart) {
return nil, fmt.Errorf("Range is out of order.")
}
for re_runes[i] != RBRACKET || i == 0 || re_runes[i-1] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
if i >= len(re_runes) {
return nil, fmt.Errorf("Opening bracket without closing bracket.")
}
for i := rangeStart; i <= rangeEnd; i++ {
toAppend = append(toAppend, i)
}
i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)
continue
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != RBRACKET) { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
re_runes[i] = CHAR_RANGE
}
toAppend = append(toAppend, re_runes[i])
i++
}
// Replace the last character (which should have been ']', with RBRACKET
toAppend[len(toAppend)-1] = RBRACKET
// Add in the RBRACKET
toAppend = append(toAppend, RBRACKET)
re_postfix = append(re_postfix, toAppend...)
}
if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either
@@ -280,7 +276,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
*/
c := re_postfix[i]
if isNormalChar(c) {
if isNormalChar(c) || isSpecialCharWithMetacharReplacement(c) {
if caseInsensitive {
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
} else {
@@ -288,7 +284,18 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
continue
}
// Escape character
// Since every unescaped bracket is replaced by a LBRACKET / RBRACKET, there may
// have been false positives. For example, the regex ']' has a closing bracket, but it
// isn't denoting a character class; it's just a regular character. Since it's not escaped,
// though, I would have converted this into an RBRACKET.
// To deal with this, I make the following assertion:
// If at any point I see an RBRACKET 'in the wild' (not in a character class), then it must be
// a regular character, with no special significance.
if c == RBRACKET {
outQueue = append(outQueue, newPostfixCharNode(']'))
continue
}
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
@@ -420,7 +427,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
}
if c == LBRACKET { // Used for character classes
i++ // Step forward so we can look at the character class
firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
i++ // Step forward so we can look at the character class
// Oops, there's nothing there to look at
if i >= len(re_postfix) {
return nil, fmt.Errorf("Opening bracket with no closing bracket.")
}
var invertMatch bool
if re_postfix[i] == '^' {
invertMatch = true
@@ -428,9 +441,14 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
for i < len(re_postfix) {
if re_postfix[i] == RBRACKET {
if firstCharAdded && re_postfix[i] == RBRACKET {
break
}
if re_postfix[i] == CHAR_RANGE {
endOfRange = true
i++
continue
}
if re_postfix[i] == '\\' { // Backslash indicates a character to be escaped
if i == len(re_postfix)-1 {
return nil, fmt.Errorf("Stray backslash in character class.")
@@ -483,13 +501,54 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i++
}
} else {
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
switch re_postfix[i] {
case LBRACKET:
chars = append(chars, newPostfixCharNode('['))
case RBRACKET:
chars = append(chars, newPostfixCharNode(']'))
default:
return nil, fmt.Errorf("Error parsing high-range unicode value in character class.")
}
}
chars = append(chars, newPostfixCharNode(re_postfix[i]))
i++
}
firstCharAdded = true
if endOfRange { // The previous character was an unescaped hyphen, which (in the context of a character class) means the character that was last appended is the end of a character range
// Things to note:
// 1. In PCRE and Go's regex engine, a letter _can_ be surrounded by hyphens in a character class.
// Eg. [a-b-c]
// While you might think this leads to a syntax error (I thought so), the engine picks 'a-b' as a range,
// then treats the second '-' and 'c' as regular characters in the character class.
// So this regex becomes "Match a character from 'a' to 'b', a literal hyphen, or 'c' ".
// 2. To account for this, the following logic is followed:
// a. If the second-to-last postfixNode ie. the start of the range has only one element, then we are in a range.
// i. If it has more than one element, then we are actually looking at a literal hyphen, and we will treat is as such.
// ii. If either the start or end of the range don't exist in 'chars' ie. something like [-a] or [a-], then too will we treat it as a literal hyphen.
// b. The last postfixNode added to 'chars' _must_ only have one character (because it's the end of the range).
endRangePostfixNode, err1 := pop(&chars)
startRangePostfixNode, err2 := pop(&chars)
if (err1 != nil || err2 != nil) || len(startRangePostfixNode.contents) != 1 { // Treat it as a regular hyphen
chars = append(chars, startRangePostfixNode, newPostfixCharNode('-'), endRangePostfixNode)
} else if len(endRangePostfixNode.contents) != 1 { // I don't even know what this would look like, this is just a sanity check
return nil, fmt.Errorf("Error parsing character range.")
} else {
// We have established that they both have a length of 1
startRangeRune := startRangePostfixNode.contents[0]
endRangeRune := endRangePostfixNode.contents[0]
chars = append(chars, newPostfixCharNode(genRange(startRangeRune, endRangeRune+1)...))
}
endOfRange = false // Reset the flag
}
}
if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.
return nil, fmt.Errorf("Opening bracket without closing bracket.")
}
outQueue = append(outQueue, newCharClassNode(chars, invertMatch))
continue
}
@@ -599,6 +658,21 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
func thompson(re []postfixNode) (Reg, error) {
nfa := make([]*State, 0) // Stack of states
numGroups := 0 // Number of capturing groups
// If thompson() receives an empty regex, then whatever was given to shuntingYard()
// was parsed away. This doesn't mean that the regex itself is empty.
// For example, it could have been '(?:)'. This is an empty non-capturing group. Since
// shuntingYard() doesn't include non-capturing groups in its output (and the group contains
// nothing), the output of shuntingYard() (and the input to thompson()) ends up being empty.
// In these cases, we will return an NFA with 1 state, with an assertion that is always true.
if len(re) == 0 {
start := newState()
start.content = newContents(EPSILON)
start.isEmpty = true
start.assert = ALWAYS_TRUE
nfa = append(nfa, &start)
}
for _, c := range re {
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
state := State{}
@@ -681,8 +755,7 @@ func thompson(re []postfixNode) (Reg, error) {
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
// Uncommenting this seems to make one of the test cases fail. Why?
// replaceByValue(state.except, ESC_BACKSLASH, '\\')
replaceByValue(state.except, ESC_BACKSLASH, '\\')
nfa = append(nfa, &state)
}
@@ -706,15 +779,36 @@ func thompson(re []postfixNode) (Reg, error) {
// and then some other node.
// These three nodes (LPAREN, the middle node and RPAREN) are extracted together, concatenated
// and added back in.
// If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN
// and RPAREN nodes.
// If neither node exists, that's a problem so I return an error.
if c.nodetype == RPAREN {
s.groupEnd = true
middleNode := mustPop(&nfa)
lparenNode := mustPop(&nfa)
s.groupNum = lparenNode.groupNum
tmp := concatenate(lparenNode, middleNode)
to_add := concatenate(tmp, s)
nfa = append(nfa, to_add)
middleNode, err1 := pop(&nfa)
lparenNode, err2 := pop(&nfa)
if err1 != nil && err2 != nil {
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
} else if err2 != nil { // There was no third node. ie. something like '()'
lparenNode = middleNode
if lparenNode.groupBegin != true { // There are only two nodes, but the first one isn't an LPAREN.
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
}
s.groupNum = lparenNode.groupNum
to_add := concatenate(lparenNode, s)
nfa = append(nfa, to_add)
} else {
// At this point, we assume all three nodes are valid ('lparenNode', 'middleNode' and 's')
if lparenNode.groupBegin {
s.groupNum = lparenNode.groupNum
} else if middleNode.groupBegin { // Something like 'a()'
s.groupNum = middleNode.groupNum
} else { // A middleNode and lparenNode exist, but neither is actually an LPAREN.
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
}
tmp := concatenate(lparenNode, middleNode)
to_add := concatenate(tmp, s)
nfa = append(nfa, to_add)
}
}
}
if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated
@@ -734,9 +828,16 @@ func thompson(re []postfixNode) (Reg, error) {
switch c.nodetype {
case CONCATENATE:
s2 := mustPop(&nfa)
s1 := mustPop(&nfa)
s1 = concatenate(s1, s2)
nfa = append(nfa, s1)
// Relax the requirements for concatenation a little bit - If
// the second element is not found ie. the postfixNodes look
// like 'a~', then that's fine, we just skip the concatenation.
s1, err := pop(&nfa)
if err != nil {
nfa = append(nfa, s2)
} else {
s1 = concatenate(s1, s2)
nfa = append(nfa, s1)
}
case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
s1 := mustPop(&nfa)
stateToAdd := kleene(*s1)

24
misc.go
View File

@@ -15,6 +15,14 @@ var LPAREN_CHAR rune = 0xF0004 // Parentheses in regex are concatenated with thi
var RPAREN_CHAR rune = 0xF0005
var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN
var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash
var CHAR_RANGE rune = 0xF0008 // Represents a character range
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', '~', '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
// An interface for int and rune, which are identical
type character interface {
int | rune
}
// Returns true if str[idx] and str[idx-1] are separated by a word boundary.
func isWordBoundary(str []rune, idx int) bool {
@@ -26,9 +34,17 @@ func isWordBoundary(str []rune, idx int) bool {
return wbounded
}
func isSpecialChar(c rune) bool {
return slices.Contains(specialChars, c)
}
// Some special characters have metacharacter replacements. These characters, when encountered in their literal form, can be treated as regular characters.
func isSpecialCharWithMetacharReplacement(c rune) bool {
return slices.Contains([]rune{'[', ']'}, c)
}
func isNormalChar(c rune) bool {
specialChars := []rune(`?*\^${}()+|[].~<>`)
specialChars = append(specialChars, LBRACKET, RBRACKET, NONCAPLPAREN_CHAR)
return !slices.Contains(specialChars, c)
}
@@ -109,8 +125,8 @@ func Reduce[T any](slc []T, fn func(T, T) T) T {
}
// Generate numbers in a range - start (inclusive) to end (exclusive)
func genRange(start, end int) []int {
toRet := make([]int, end-start)
func genRange[T character](start, end T) []T {
toRet := make([]T, end-start)
for i := start; i < end; i++ {
toRet[i-start] = i
}

12
nfa.go
View File

@@ -14,10 +14,11 @@ const (
EOS
WBOUND
NONWBOUND
PLA // Positive lookahead
NLA // Negative lookahead
PLB // Positive lookbehind
NLB // Negative lookbehind
PLA // Positive lookahead
NLA // Negative lookahead
PLB // Positive lookbehind
NLB // Negative lookbehind
ALWAYS_TRUE // An assertion that is always true
)
type State struct {
@@ -103,6 +104,9 @@ func cloneStateHelper(state *State, cloneMap map[*State]*State) *State {
// Checks if the given state's assertion is true. Returns true if the given
// state doesn't have an assertion.
func (s State) checkAssertion(str []rune, idx int) bool {
if s.assert == ALWAYS_TRUE {
return true
}
if s.assert == SOS {
return idx == 0
}

View File

@@ -116,6 +116,13 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
case 'v': // Vertical tab
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, rune(11))
case '-': // Literal hyphen - only in character class
if inCharClass {
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, '-')
} else {
return postfixNode{}, fmt.Errorf("Invalid escape character.")
}
default: // None of the above - append it as a regular character
if isNormalChar(c) { // Normal characters cannot be escaped
return postfixNode{}, fmt.Errorf("Invalid escape character.")

View File

@@ -1,221 +1,312 @@
package main
import (
"fmt"
"slices"
"testing"
)
var reTests = []struct {
re string
flags []ReFlag
str string
result []Group // Stores all zero-groups in the match
}{
{"a", "abc", []Group{{0, 1}}},
{"a", "bca", []Group{{2, 3}}},
{"l", "ggllgg", []Group{{2, 3}, {3, 4}}},
{"(b|c)", "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}},
{"a+", "brerereraaaaabbbbb", []Group{{8, 13}}},
{"ab+", "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}},
{"(b|c|A)", "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}},
{"ab*", "a", []Group{{0, 1}}},
{"ab*", "abb", []Group{{0, 3}}},
{"a*b", "aaab", []Group{{0, 4}}},
{"a*b", "qwqw", []Group{}},
{"(abc)*", "abcabcabc", []Group{{0, 9}, {9, 9}}},
{"((abc)|(def))*", "abcdef", []Group{{0, 6}, {6, 6}}},
{"(abc)*|(def)*", "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
{"b*a*a", "bba", []Group{{0, 3}}},
{"(ab)+", "abcabddd", []Group{{0, 2}, {3, 5}}},
{"a(b(c|d)*)*", "abccbd", []Group{{0, 6}}},
{"a(b|c)*d+", "abccdd", []Group{{0, 6}}},
{"a*", "", []Group{{0, 0}}},
{"a|b", "c", []Group{}},
{"(a|b)*c", "aabbc", []Group{{0, 5}}},
{"a(b|b)", "ab", []Group{{0, 2}}},
{"a*", "aaaaaaaa", []Group{{0, 8}, {8, 8}}},
{"a", nil, "abc", []Group{{0, 1}}},
{"a", nil, "bca", []Group{{2, 3}}},
{"l", nil, "ggllgg", []Group{{2, 3}, {3, 4}}},
{"(b|c)", nil, "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}},
{"a+", nil, "brerereraaaaabbbbb", []Group{{8, 13}}},
{"ab+", nil, "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}},
{"(b|c|A)", nil, "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}},
{"ab*", nil, "a", []Group{{0, 1}}},
{"ab*", nil, "abb", []Group{{0, 3}}},
{"a*b", nil, "aaab", []Group{{0, 4}}},
{"a*b", nil, "qwqw", []Group{}},
{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
{"b*a*a", nil, "bba", []Group{{0, 3}}},
{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
{"a(b|c)*d+", nil, "abccdd", []Group{{0, 6}}},
{"a*", nil, "", []Group{{0, 0}}},
{"a|b", nil, "c", []Group{}},
{"(a|b)*c", nil, "aabbc", []Group{{0, 5}}},
{"a(b|b)", nil, "ab", []Group{{0, 2}}},
{"a*", nil, "aaaaaaaa", []Group{{0, 8}, {8, 8}}},
{"ab?", "ab", []Group{{0, 2}}},
{"a?b", "ab", []Group{{0, 2}}},
{"a?", "", []Group{{0, 0}}},
{"a?b?c", "a", []Group{}},
{"a?b?c?", "ab", []Group{{0, 2}, {2, 2}}},
{"a?b?c?", "ac", []Group{{0, 2}, {2, 2}}},
{"a?b?c", "abc", []Group{{0, 3}}},
{"a?b?c", "acb", []Group{{0, 2}}},
{"ab?", nil, "ab", []Group{{0, 2}}},
{"a?b", nil, "ab", []Group{{0, 2}}},
{"a?", nil, "", []Group{{0, 0}}},
{"a?b?c", nil, "a", []Group{}},
{"a?b?c?", nil, "ab", []Group{{0, 2}, {2, 2}}},
{"a?b?c?", nil, "ac", []Group{{0, 2}, {2, 2}}},
{"a?b?c", nil, "abc", []Group{{0, 3}}},
{"a?b?c", nil, "acb", []Group{{0, 2}}},
{"[abc]", "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}},
{"[ab]c", "ab", []Group{}},
{"g[ab]c", "gac", []Group{{0, 3}}},
{"g[ab]c", "gbc", []Group{{0, 3}}},
{"g[ab]c", "gc", []Group{}},
{"g[ab]c", "gfc", []Group{}},
{"[ab]*", "aabbbabaababab", []Group{{0, 14}, {14, 14}}},
{"[ab]+", "aabbbablaababab", []Group{{0, 7}, {8, 15}}},
{"[Ff]r[Uu]it", "fruit", []Group{{0, 5}}},
{"[Ff]r[Uu]it", "FrUit", []Group{{0, 5}}},
{"[Ff]r[Uu|]it", "Fr|it", []Group{{0, 5}}},
{"[Ff]r([Uu]|[pP])it", "Frpit", []Group{{0, 5}}},
{"[Ff]r[Uu]|[pP]it", "Frpit", []Group{{2, 5}}},
{"[a-zA-Z]+", "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}},
{"[abc]", nil, "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}},
{"[ab]c", nil, "ab", []Group{}},
{"g[ab]c", nil, "gac", []Group{{0, 3}}},
{"g[ab]c", nil, "gbc", []Group{{0, 3}}},
{"g[ab]c", nil, "gc", []Group{}},
{"g[ab]c", nil, "gfc", []Group{}},
{"[ab]*", nil, "aabbbabaababab", []Group{{0, 14}, {14, 14}}},
{"[ab]+", nil, "aabbbablaababab", []Group{{0, 7}, {8, 15}}},
{"[Ff]r[Uu]it", nil, "fruit", []Group{{0, 5}}},
{"[Ff]r[Uu]it", nil, "FrUit", []Group{{0, 5}}},
{"[Ff]r[Uu|]it", nil, "Fr|it", []Group{{0, 5}}},
{"[Ff]r([Uu]|[pP])it", nil, "Frpit", []Group{{0, 5}}},
{"[Ff]r[Uu]|[pP]it", nil, "Frpit", []Group{{2, 5}}},
{"[a-zA-Z]+", nil, "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}},
{".+", "Hello, how is it going?", []Group{{0, 23}}},
{"a.", "a ", []Group{{0, 2}}},
{"a.b", "a/b", []Group{{0, 3}}},
{".", "a ", []Group{{0, 1}, {1, 2}}},
{"a.", "a ", []Group{{0, 2}}},
{".+b", "abc", []Group{{0, 2}}},
{".+", nil, "Hello, how is it going?", []Group{{0, 23}}},
{"a.", nil, "a ", []Group{{0, 2}}},
{"a.b", nil, "a/b", []Group{{0, 3}}},
{".", nil, "a ", []Group{{0, 1}, {1, 2}}},
{"a.", nil, "a ", []Group{{0, 2}}},
{".+b", nil, "abc", []Group{{0, 2}}},
{`\d`, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}},
{`\\`, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}},
{`\W`, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}},
{`\w`, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}},
{`\s`, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
{`\<`, "<HTML><body>", []Group{{0, 1}, {6, 7}}},
{`\(.+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}},
{`\d`, nil, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}},
{`\\`, nil, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}},
{`\W`, nil, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}},
{`\w`, nil, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}},
{`\s`, nil, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
{`\<`, nil, "<HTML><body>", []Group{{0, 1}, {6, 7}}},
{`\(.+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}},
{"[^abc]+", "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}},
{"[^a]+", "qqqaq", []Group{{0, 3}, {4, 5}}},
{"[^0-9]+", "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}},
{"[^abc]+", "ababababbababaccacacacaca", []Group{}},
{`\[`, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}},
{`\([^)]+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}},
{"[^abc]+", nil, "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}},
{"[^a]+", nil, "qqqaq", []Group{{0, 3}, {4, 5}}},
{"[^0-9]+", nil, "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}},
{"[^abc]+", nil, "ababababbababaccacacacaca", []Group{}},
{`\[`, nil, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}},
{`\([^)]+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}},
{"^ab", "ab bab", []Group{{0, 2}}},
{"^aaaa^", "aaaaaaaa", []Group{}},
{"^([bB][Gg])", "bG", []Group{{0, 2}}},
{"b$", "ba", []Group{}},
{"(boy|girl)$", "girlf", []Group{}},
{`\bint\b`, "print int integer", []Group{{6, 9}}},
{`int\b`, "ints", []Group{}},
{`int(\b|a)`, "inta", []Group{{0, 4}}},
{`\b\d+\b`, "511 a3 43", []Group{{0, 3}, {7, 9}}},
{`\Bint\B`, "prints int integer print", []Group{{2, 5}}},
{`^`, "5^3^2", []Group{{0, 0}}},
{`\^`, "5^3^2", []Group{{1, 2}, {3, 4}}},
{`pool$`, "pool carpool", []Group{{8, 12}}},
{`^int$`, "print int integer", []Group{}},
{`^int$`, "int", []Group{{0, 3}}},
{`b*`, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}},
{"^ab", nil, "ab bab", []Group{{0, 2}}},
{"^aaaa^", nil, "aaaaaaaa", []Group{}},
{"^([bB][Gg])", nil, "bG", []Group{{0, 2}}},
{"b$", nil, "ba", []Group{}},
{"(boy|girl)$", nil, "girlf", []Group{}},
{`\bint\b`, nil, "print int integer", []Group{{6, 9}}},
{`int\b`, nil, "ints", []Group{}},
{`int(\b|a)`, nil, "inta", []Group{{0, 4}}},
{`\b\d+\b`, nil, "511 a3 43", []Group{{0, 3}, {7, 9}}},
{`\Bint\B`, nil, "prints int integer print", []Group{{2, 5}}},
{`^`, nil, "5^3^2", []Group{{0, 0}}},
{`\^`, nil, "5^3^2", []Group{{1, 2}, {3, 4}}},
{`pool$`, nil, "pool carpool", []Group{{8, 12}}},
{`^int$`, nil, "print int integer", []Group{}},
{`^int$`, nil, "int", []Group{{0, 3}}},
{`b*`, nil, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}},
{"a{4}", "aabaaa", []Group{}},
{"ab{5}", "abbbbbab", []Group{{0, 6}}},
{"(a|b){3,4}", "aba", []Group{{0, 3}}},
{"(a|b){3,4}", "ababaa", []Group{{0, 4}}},
{"(bc){5,}", "bcbcbcbcbcbcbcbc", []Group{{0, 16}}},
{`\d{3,4}`, "1209", []Group{{0, 4}}},
{`\d{3,4}`, "109", []Group{{0, 3}}},
{`\d{3,4}`, "5", []Group{}},
{`\d{3,4}`, "123135", []Group{{0, 4}}},
{`\d{3,4}`, "89a-0", []Group{}},
{`\d{3,4}`, "ababab555", []Group{{6, 9}}},
{`\bpaint\b`, "paints", []Group{}},
{`\b\w{5}\b`, "paint", []Group{{0, 5}}},
{`[^\w]`, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
{`[^\W]`, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
{`[\[\]]`, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
{"a{4}", nil, "aabaaa", []Group{}},
{"ab{5}", nil, "abbbbbab", []Group{{0, 6}}},
{"(a|b){3,4}", nil, "aba", []Group{{0, 3}}},
{"(a|b){3,4}", nil, "ababaa", []Group{{0, 4}}},
{"(bc){5,}", nil, "bcbcbcbcbcbcbcbc", []Group{{0, 16}}},
{`\d{3,4}`, nil, "1209", []Group{{0, 4}}},
{`\d{3,4}`, nil, "109", []Group{{0, 3}}},
{`\d{3,4}`, nil, "5", []Group{}},
{`\d{3,4}`, nil, "123135", []Group{{0, 4}}},
{`\d{3,4}`, nil, "89a-0", []Group{}},
{`\d{3,4}`, nil, "ababab555", []Group{{6, 9}}},
{`\bpaint\b`, nil, "paints", []Group{}},
{`\b\w{5}\b`, nil, "paint", []Group{{0, 5}}},
{`[^\w]`, nil, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
{`[^\W]`, nil, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
{`[\[\]]`, nil, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
// Unicode tests
{`.+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
{`a.b`, "a²b", []Group{{0, 3}}},
{`[^a]+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
{`.+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
{`a.b`, nil, "a²b", []Group{{0, 3}}},
{`[^a]+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
// Fun experiment - AI-generated tests
{"(abc|def|ghi)", "abcdefg", []Group{{0, 3}, {3, 6}}},
{"a(b|c)d", "abcd", []Group{}},
{"a(b|c)*d", "abcbcd", []Group{{0, 6}}},
{"a(b|c)+d", "abcbcd", []Group{{0, 6}}},
{"a(b|c)?d", "abd", []Group{{0, 3}}},
{".+", "hello world", []Group{{0, 11}}},
{"a.b", "aXb", []Group{{0, 3}}},
{"a.*b", "aXb", []Group{{0, 3}}},
{"a.{2,3}b", "aXXb", []Group{{0, 4}}},
{"a.{2,}b", "aXXXb", []Group{{0, 5}}},
{"a.{0,3}b", "ab", []Group{{0, 2}}},
{"[abc]+", "abcabc", []Group{{0, 6}}},
{"[a-zA-Z]+", "HelloWorld", []Group{{0, 10}}},
{"[^abc]+", "defghi", []Group{{0, 6}}},
{"^hello", "hello world", []Group{{0, 5}}},
{"world$", "hello world", []Group{{6, 11}}},
{`\bhello\b`, "hello world", []Group{{0, 5}}},
{`\Bhello\B`, "hello world", []Group{}},
{"(hello|world)", "hello world", []Group{{0, 5}, {6, 11}}},
{"(hello|world)+", "hello world", []Group{{0, 5}, {6, 11}}},
{"(hello|world)*", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
{"(hello|world)?", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
{"ú.+ï", "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}},
{"(?=hello)", "hello world", []Group{{0, 0}}},
{"(?!hello)", "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
{"(?<=hello)", "hello world", []Group{{5, 5}}},
{"(?<!hello)", "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "40", []Group{{0, 2}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "040", []Group{}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "400", []Group{{0, 3}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "4000", []Group{}},
{"a{1,3}", "aaaaa", []Group{{0, 3}, {3, 5}}},
{`\\[ab\\]`, "a", []Group{}},
{`\\[ab\\]`, `\a`, []Group{{0, 2}}},
{"(abc|def|ghi)", nil, "abcdefg", []Group{{0, 3}, {3, 6}}},
{"a(b|c)d", nil, "abcd", []Group{}},
{"a(b|c)*d", nil, "abcbcd", []Group{{0, 6}}},
{"a(b|c)+d", nil, "abcbcd", []Group{{0, 6}}},
{"a(b|c)?d", nil, "abd", []Group{{0, 3}}},
{".+", nil, "hello world", []Group{{0, 11}}},
{"a.b", nil, "aXb", []Group{{0, 3}}},
{"a.*b", nil, "aXb", []Group{{0, 3}}},
{"a.{2,3}b", nil, "aXXb", []Group{{0, 4}}},
{"a.{2,}b", nil, "aXXXb", []Group{{0, 5}}},
{"a.{0,3}b", nil, "ab", []Group{{0, 2}}},
{"[abc]+", nil, "abcabc", []Group{{0, 6}}},
{"[a-zA-Z]+", nil, "HelloWorld", []Group{{0, 10}}},
{"[^abc]+", nil, "defghi", []Group{{0, 6}}},
{"^hello", nil, "hello world", []Group{{0, 5}}},
{"world$", nil, "hello world", []Group{{6, 11}}},
{`\bhello\b`, nil, "hello world", []Group{{0, 5}}},
{`\Bhello\B`, nil, "hello world", []Group{}},
{"(hello|world)", nil, "hello world", []Group{{0, 5}, {6, 11}}},
{"(hello|world)+", nil, "hello world", []Group{{0, 5}, {6, 11}}},
{"(hello|world)*", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
{"(hello|world)?", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
{"ú.+ï", nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}},
{"(?=hello)", nil, "hello world", []Group{{0, 0}}},
{"(?!hello)", nil, "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
{"(?<=hello)", nil, "hello world", []Group{{5, 5}}},
{"(?<!hello)", nil, "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "40", []Group{{0, 2}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "040", []Group{}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "400", []Group{{0, 3}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "4000", []Group{}},
{"a{1,3}", nil, "aaaaa", []Group{{0, 3}, {3, 5}}},
{`\\[ab\\]`, nil, "a", []Group{}},
{`\\[ab\\]`, nil, `\a`, []Group{{0, 2}}},
// Lookaround tests
{"(?<=bo)y", "boy", []Group{{2, 3}}},
{"bo(?=y)", "boy", []Group{{0, 2}}},
{"(?<=f)f+(?=f)", "fffff", []Group{{1, 4}}},
{"(?<=f)f+(?=f)", "fffffa", []Group{{1, 4}}},
{"(?<=bo)y", nil, "boy", []Group{{2, 3}}},
{"bo(?=y)", nil, "boy", []Group{{0, 2}}},
{"(?<=f)f+(?=f)", nil, "fffff", []Group{{1, 4}}},
{"(?<=f)f+(?=f)", nil, "fffffa", []Group{{1, 4}}},
// Test cases from Python's RE test suite
{`[\1]`, "\x01", []Group{{0, 1}}},
{`[\1]`, nil, "\x01", []Group{{0, 1}}},
{`\0`, "\x00", []Group{{0, 1}}},
{`[\0a]`, "\x00", []Group{{0, 1}}},
{`[\0a]`, "\x00", []Group{{0, 1}}},
{`[a\0]`, "\x00", []Group{{0, 1}}},
{`[^a\0]`, "\x00", []Group{}},
{`\0`, nil, "\x00", []Group{{0, 1}}},
{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
{`[a\0]`, nil, "\x00", []Group{{0, 1}}},
{`[^a\0]`, nil, "\x00", []Group{}},
{`\a[\b]\f\n\r\t\v`, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
{`[\a][\b][\f][\n][\r][\t][\v]`, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
{`\u`, "", nil},
{`\xff`, "ÿ", []Group{{0, 1}}},
{`\x00ffffffffffffff`, "\xff", []Group{}},
{`\x00f`, "\x0f", []Group{}},
{`\x00fe`, "\xfe", []Group{}},
{`^\w+=(\\[\000-\277]|[^\n\\])*`, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
{`\a[\b]\f\n\r\t\v`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
{`[\a][\b][\f][\n][\r][\t][\v]`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
{`\u`, nil, "", nil},
{`\xff`, nil, "ÿ", []Group{{0, 1}}},
{`\x00ffffffffffffff`, nil, "\xff", []Group{}},
{`\x00f`, nil, "\x0f", []Group{}},
{`\x00fe`, nil, "\xfe", []Group{}},
{`^\w+=(\\[\000-\277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
{`a.b`, nil, `acb`, []Group{{0, 3}}},
{`a.b`, nil, "a\nb", []Group{}},
{`a.*b`, nil, "acc\nccb", []Group{}},
{`a.{4,5}b`, nil, "acc\nccb", []Group{}},
{`a.b`, nil, "a\rb", []Group{{0, 3}}},
{`a.b`, []ReFlag{RE_MULTILINE}, "a\nb", []Group{{0, 3}}},
{`a.*b`, []ReFlag{RE_MULTILINE}, "acc\nccb", []Group{{0, 7}}},
{`a.{4,5}b`, []ReFlag{RE_MULTILINE}, "acc\nccb", []Group{{0, 7}}},
{`)`, nil, ``, nil},
{`^$`, nil, ``, []Group{{0, 0}}},
{`abc`, nil, `abc`, []Group{{0, 3}}},
{`abc`, nil, `xbc`, []Group{}},
{`abc`, nil, `axc`, []Group{}},
{`abc`, nil, `abx`, []Group{}},
{`abc`, nil, `xabcy`, []Group{{1, 4}}},
{`abc`, nil, `ababc`, []Group{{2, 5}}},
{`ab*c`, nil, `abc`, []Group{{0, 3}}},
{`ab*bc`, nil, `abc`, []Group{{0, 3}}},
{`ab*bc`, nil, `abbc`, []Group{{0, 4}}},
{`ab*bc`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab+bc`, nil, `abbc`, []Group{{0, 4}}},
{`ab+bc`, nil, `abc`, []Group{}},
{`ab+bc`, nil, `abq`, []Group{}},
{`ab+bc`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab?bc`, nil, `abbc`, []Group{{0, 4}}},
{`ab?bc`, nil, `abc`, []Group{{0, 3}}},
{`ab?bc`, nil, `abbbbc`, []Group{}},
{`ab?c`, nil, `abc`, []Group{{0, 3}}},
{`^abc$`, nil, `abc`, []Group{{0, 3}}},
{`^abc$`, nil, `abcc`, []Group{}},
{`^abc`, nil, `abcc`, []Group{{0, 3}}},
{`^abc$`, nil, `aabc`, []Group{}},
{`abc$`, nil, `aabc`, []Group{{1, 4}}},
{`^`, nil, `abc`, []Group{{0, 0}}},
{`$`, nil, `abc`, []Group{{3, 3}}},
{`a.c`, nil, `abc`, []Group{{0, 3}}},
{`a.c`, nil, `axc`, []Group{{0, 3}}},
{`a.*c`, nil, `axyzc`, []Group{{0, 5}}},
{`a.*c`, nil, `axyzd`, []Group{}},
{`a[bc]d`, nil, `abc`, []Group{}},
{`a[bc]d`, nil, `abd`, []Group{{0, 3}}},
{`a[b-d]e`, nil, `abd`, []Group{}},
{`a[b-d]e`, nil, `ace`, []Group{{0, 3}}},
{`a[b-d]`, nil, `aac`, []Group{{1, 3}}},
{`a[-b]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
{`a[\-b]`, nil, `a-`, []Group{{0, 2}}},
{`a[b-]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
{`a[]b`, nil, `-`, nil},
{`a[`, nil, `-`, nil},
{`a\`, nil, `-`, nil},
{`abc)`, nil, `-`, nil},
{`(abc`, nil, `-`, nil},
{`a]`, nil, `a]`, []Group{{0, 2}}},
{`a[]]b`, nil, `a]b`, []Group{{0, 3}}},
{`a[\]]b`, nil, `a]b`, []Group{{0, 3}}},
{`a[^bc]d`, nil, `aed`, []Group{{0, 3}}},
{`a[^bc]d`, nil, `abd`, []Group{}},
{`a[^-b]c`, nil, `adc`, []Group{{0, 3}}},
{`a[^-b]c`, nil, `a-c`, []Group{}},
{`a[^]b]c`, nil, `a]c`, []Group{}},
{`a[^]b]c`, nil, `adc`, []Group{{0, 3}}},
{`\ba\b`, nil, `a-`, []Group{{0, 1}}},
{`\ba\b`, nil, `-a`, []Group{{1, 2}}},
{`\ba\b`, nil, `-a-`, []Group{{1, 2}}},
{`\by\b`, nil, `xy`, []Group{}},
{`\by\b`, nil, `yz`, []Group{}},
{`\by\b`, nil, `xyz`, []Group{}},
{`x\b`, nil, `xyz`, []Group{}},
{`x\B`, nil, `xyz`, []Group{{0, 1}}},
{`\Bz`, nil, `xyz`, []Group{{2, 3}}},
{`z\B`, nil, `xyz`, []Group{}},
{`\Bx`, nil, `xyz`, []Group{}},
{`\Ba\B`, nil, `a-`, []Group{}},
{`\Ba\B`, nil, `-a`, []Group{}},
{`\Ba\B`, nil, `-a-`, []Group{}},
{`\By\B`, nil, `xy`, []Group{}},
{`\By\B`, nil, `yz`, []Group{}},
{`\By\b`, nil, `xy`, []Group{{1, 2}}},
{`\by\B`, nil, `yz`, []Group{{0, 1}}},
{`\By\B`, nil, `xyz`, []Group{{1, 2}}},
{`ab|cd`, nil, `abc`, []Group{{0, 2}}},
{`ab|cd`, nil, `abcd`, []Group{{0, 2}, {2, 4}}},
// Todo - add numeric range tests
}
var groupTests = []struct {
re string
flags []ReFlag
str string
result []Match
}{
{"(a)(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
{"((a))(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
{"(0)", "ab", []Match{[]Group{}}},
{"(a)b", "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
{"a(b)", "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
{"(a|b)", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
{"(a)|(b)", "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}},
{"(a+)(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}},
{"(a+)|(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(a+)(aa)", "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
{"(aaaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
{"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
{"(aaaa)|(aaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(a)|(aa)", "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
{"(a?)a?", "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
{"(a?)a?", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
{"(a?)a?", "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
{"a((b.d){3})", "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}},
{`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, `abcdefghijkl9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}},
{"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
{"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
{"(0)", nil, "ab", []Match{[]Group{}}},
{"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
{"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
{"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
{"(a)|(b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}},
{"(a+)(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}},
{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
{"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
{"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
{"a((b.d){3})", nil, "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}},
{`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, nil, `abcdefghijkl9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}},
{`()ef`, nil, `def`, []Match{[]Group{{1, 3}, {1, 1}}}},
{`(?:)ef`, nil, `def`, []Match{[]Group{{1, 3}}}},
{`(?:)`, nil, `def`, []Match{[]Group{{0, 0}}, []Group{{1, 1}}, []Group{{2, 2}}, []Group{{3, 3}}}},
}
func TestFindAllMatches(t *testing.T) {
for _, test := range reTests {
t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re)
regComp, err := Compile(test.re, test.flags...)
if err != nil {
if test.result != nil {
panic(err)
panic(fmt.Errorf("Test Error: %v", err))
}
} else {
matchIndices := FindAllMatches(regComp, test.str)
@@ -234,7 +325,7 @@ func TestFindAllMatches(t *testing.T) {
func TestFindString(t *testing.T) {
for _, test := range reTests {
t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re)
regComp, err := Compile(test.re, test.flags...)
if err != nil {
if test.result != nil {
panic(err)
@@ -259,7 +350,7 @@ func TestFindString(t *testing.T) {
func TestFindAllGroups(t *testing.T) {
for _, test := range groupTests {
t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re)
regComp, err := Compile(test.re, test.flags...)
if err != nil {
if test.result != nil {
panic(err)