21 Commits

Author SHA1 Message Date
08e01a1c81 Loosened restrictions for concatenation - It's okay if one of the
elements is missing
2025-01-25 13:09:47 -05:00
5c2869ff81 Updated test case 2025-01-25 13:09:29 -05:00
4dfc77900f Added new assertion that always evaluates to true 2025-01-25 13:04:51 -05:00
93903fc557 Allowed creation of empty non-capturing groups 2025-01-25 13:04:36 -05:00
036e625a15 Added more test cases 2025-01-25 13:04:08 -05:00
4966a222f9 Added detection of empty parentheses, as zero-length matches 2025-01-25 12:44:40 -05:00
263619c50c Added more test cases 2025-01-25 12:23:15 -05:00
d7c9c181e1 Fixed bug in character class implementation 2025-01-24 19:48:53 -05:00
5a085907cf WIP - fixing character classes 2025-01-24 17:06:19 -05:00
65e5b4e2af Added more test cases 2025-01-24 17:06:00 -05:00
1520edad55 Enforce the rule that character classes must have at least one character; interpret literal closing brackets as regular characters 2025-01-24 15:50:36 -05:00
6fb266e0d2 Refactored isNormalChar(), wrote function to get special characters that have metachar replacements 2025-01-24 15:49:33 -05:00
423fcc9b54 Added more test cases (1 failing) 2025-01-24 14:58:18 -05:00
cf4d305b31 Allow hyphen to be escaped inside character class 2025-01-24 14:58:07 -05:00
9d3c228ace Fixed edge cases with character ranges and character classes 2025-01-24 14:57:47 -05:00
5e12fe1c42 Added 'flags' field to test struct for all-group tests 2025-01-24 11:11:48 -05:00
f87458ee99 Added 'flags' field to test struct for 0-group tests 2025-01-24 11:10:01 -05:00
2937f2d917 Removed old comment 2025-01-22 20:27:35 -05:00
efab70f9dc Implemented character range detection later in the code, using a metacharacter 2025-01-22 20:26:58 -05:00
cf964e41db Modified genRange() so that it can work on ints and runes 2025-01-22 20:25:49 -05:00
649485f01d Removed character range creation from the first part of shuntingYard() (the part that adds concatenation operators), because octal and hex values haven't yet been deciphered at this point in the code 2025-01-22 16:51:00 -05:00
5 changed files with 445 additions and 226 deletions

View File

@@ -82,6 +82,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:' // Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
// I take this out, and put in a special character - NONCAPLPAREN_CHAR. // I take this out, and put in a special character - NONCAPLPAREN_CHAR.
// //
// Another check is made for unescaped brackets - opening brackets are replaced with LBRACKET and closing brackets are replaced with RBRACKET.
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(), // Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting // these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
// whether '\\[a]' has an escaped opening bracket (it doesn't). // whether '\\[a]' has an escaped opening bracket (it doesn't).
@@ -122,6 +123,12 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash } else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
re_runes = append(re_runes, ESC_BACKSLASH) re_runes = append(re_runes, ESC_BACKSLASH)
i++ i++
} else if c == '[' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
re_runes = append(re_runes, LBRACKET)
continue
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
re_runes = append(re_runes, RBRACKET)
continue
} else { } else {
re_runes = append(re_runes, c) re_runes = append(re_runes, c)
} }
@@ -141,39 +148,28 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i := 0 i := 0
for i < len(re_runes) { for i < len(re_runes) {
re_postfix = append(re_postfix, re_runes[i]) re_postfix = append(re_postfix, re_runes[i])
if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped. Inside this block, the only task is to expand character ranges into their constituent characters. if re_runes[i] == LBRACKET && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped.
re_postfix[len(re_postfix)-1] = LBRACKET // Replace the '[' character with LBRACKET. This allows for easier parsing of all characters (including opening and closing brackets) within the character class toAppend := make([]rune, 0) // Holds all the runes in the current character class
toAppend := make([]rune, 0) // Holds all the runes in the current character class
if i < len(re_runes)-1 && re_runes[i+1] == '^' { // Inverting class - match everything NOT in brackets i++ // Skip past LBRACKET, because it was already added
re_postfix = append(re_postfix, '^') if i >= len(re_runes) { // Sanity check before we start
i++ // Skip opening bracket and caret return nil, fmt.Errorf("Opening bracket without closing bracket.")
} }
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
return nil, fmt.Errorf("Empty character class.")
}
for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// TODO: Check for escaped characters
// Check ahead for character range for re_runes[i] != RBRACKET || i == 0 || re_runes[i-1] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
if i < len(re_runes)-2 && re_runes[i+1] == '-' { // Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
rangeStart := re_runes[i] if i >= len(re_runes) {
rangeEnd := re_runes[i+2] return nil, fmt.Errorf("Opening bracket without closing bracket.")
if int(rangeEnd) < int(rangeStart) { }
return nil, fmt.Errorf("Range is out of order.")
}
for i := rangeStart; i <= rangeEnd; i++ { if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != RBRACKET) { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
toAppend = append(toAppend, i) re_runes[i] = CHAR_RANGE
}
i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)
continue
} }
toAppend = append(toAppend, re_runes[i]) toAppend = append(toAppend, re_runes[i])
i++
} }
// Replace the last character (which should have been ']', with RBRACKET // Add in the RBRACKET
toAppend[len(toAppend)-1] = RBRACKET toAppend = append(toAppend, RBRACKET)
re_postfix = append(re_postfix, toAppend...) re_postfix = append(re_postfix, toAppend...)
} }
if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either
@@ -280,7 +276,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue. 6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
*/ */
c := re_postfix[i] c := re_postfix[i]
if isNormalChar(c) { if isNormalChar(c) || isSpecialCharWithMetacharReplacement(c) {
if caseInsensitive { if caseInsensitive {
outQueue = append(outQueue, newPostfixNode(allCases(c)...)) outQueue = append(outQueue, newPostfixNode(allCases(c)...))
} else { } else {
@@ -288,7 +284,18 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
continue continue
} }
// Escape character // Since every unescaped bracket is replaced by a LBRACKET / RBRACKET, there may
// have been false positives. For example, the regex ']' has a closing bracket, but it
// isn't denoting a character class; it's just a regular character. Since it's not escaped,
// though, I would have converted this into an RBRACKET.
// To deal with this, I make the following assertion:
// If at any point I see an RBRACKET 'in the wild' (not in a character class), then it must be
// a regular character, with no special significance.
if c == RBRACKET {
outQueue = append(outQueue, newPostfixCharNode(']'))
continue
}
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it) if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
return nil, fmt.Errorf("ERROR: Backslash with no escape character.") return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
@@ -420,7 +427,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
} }
if c == LBRACKET { // Used for character classes if c == LBRACKET { // Used for character classes
i++ // Step forward so we can look at the character class firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
i++ // Step forward so we can look at the character class
// Oops, there's nothing there to look at
if i >= len(re_postfix) {
return nil, fmt.Errorf("Opening bracket with no closing bracket.")
}
var invertMatch bool var invertMatch bool
if re_postfix[i] == '^' { if re_postfix[i] == '^' {
invertMatch = true invertMatch = true
@@ -428,9 +441,14 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
chars := make([]postfixNode, 0) // List of nodes - used only for character classes chars := make([]postfixNode, 0) // List of nodes - used only for character classes
for i < len(re_postfix) { for i < len(re_postfix) {
if re_postfix[i] == RBRACKET { if firstCharAdded && re_postfix[i] == RBRACKET {
break break
} }
if re_postfix[i] == CHAR_RANGE {
endOfRange = true
i++
continue
}
if re_postfix[i] == '\\' { // Backslash indicates a character to be escaped if re_postfix[i] == '\\' { // Backslash indicates a character to be escaped
if i == len(re_postfix)-1 { if i == len(re_postfix)-1 {
return nil, fmt.Errorf("Stray backslash in character class.") return nil, fmt.Errorf("Stray backslash in character class.")
@@ -483,13 +501,54 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i++ i++
} }
} else { } else {
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
switch re_postfix[i] {
case LBRACKET:
chars = append(chars, newPostfixCharNode('['))
case RBRACKET:
chars = append(chars, newPostfixCharNode(']'))
default:
return nil, fmt.Errorf("Error parsing high-range unicode value in character class.")
}
}
chars = append(chars, newPostfixCharNode(re_postfix[i])) chars = append(chars, newPostfixCharNode(re_postfix[i]))
i++ i++
} }
firstCharAdded = true
if endOfRange { // The previous character was an unescaped hyphen, which (in the context of a character class) means the character that was last appended is the end of a character range
// Things to note:
// 1. In PCRE and Go's regex engine, a letter _can_ be surrounded by hyphens in a character class.
// Eg. [a-b-c]
// While you might think this leads to a syntax error (I thought so), the engine picks 'a-b' as a range,
// then treats the second '-' and 'c' as regular characters in the character class.
// So this regex becomes "Match a character from 'a' to 'b', a literal hyphen, or 'c' ".
// 2. To account for this, the following logic is followed:
// a. If the second-to-last postfixNode ie. the start of the range has only one element, then we are in a range.
// i. If it has more than one element, then we are actually looking at a literal hyphen, and we will treat is as such.
// ii. If either the start or end of the range don't exist in 'chars' ie. something like [-a] or [a-], then too will we treat it as a literal hyphen.
// b. The last postfixNode added to 'chars' _must_ only have one character (because it's the end of the range).
endRangePostfixNode, err1 := pop(&chars)
startRangePostfixNode, err2 := pop(&chars)
if (err1 != nil || err2 != nil) || len(startRangePostfixNode.contents) != 1 { // Treat it as a regular hyphen
chars = append(chars, startRangePostfixNode, newPostfixCharNode('-'), endRangePostfixNode)
} else if len(endRangePostfixNode.contents) != 1 { // I don't even know what this would look like, this is just a sanity check
return nil, fmt.Errorf("Error parsing character range.")
} else {
// We have established that they both have a length of 1
startRangeRune := startRangePostfixNode.contents[0]
endRangeRune := endRangePostfixNode.contents[0]
chars = append(chars, newPostfixCharNode(genRange(startRangeRune, endRangeRune+1)...))
}
endOfRange = false // Reset the flag
}
} }
if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic. if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.
return nil, fmt.Errorf("Opening bracket without closing bracket.") return nil, fmt.Errorf("Opening bracket without closing bracket.")
} }
outQueue = append(outQueue, newCharClassNode(chars, invertMatch)) outQueue = append(outQueue, newCharClassNode(chars, invertMatch))
continue continue
} }
@@ -599,6 +658,21 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
func thompson(re []postfixNode) (Reg, error) { func thompson(re []postfixNode) (Reg, error) {
nfa := make([]*State, 0) // Stack of states nfa := make([]*State, 0) // Stack of states
numGroups := 0 // Number of capturing groups numGroups := 0 // Number of capturing groups
// If thompson() receives an empty regex, then whatever was given to shuntingYard()
// was parsed away. This doesn't mean that the regex itself is empty.
// For example, it could have been '(?:)'. This is an empty non-capturing group. Since
// shuntingYard() doesn't include non-capturing groups in its output (and the group contains
// nothing), the output of shuntingYard() (and the input to thompson()) ends up being empty.
// In these cases, we will return an NFA with 1 state, with an assertion that is always true.
if len(re) == 0 {
start := newState()
start.content = newContents(EPSILON)
start.isEmpty = true
start.assert = ALWAYS_TRUE
nfa = append(nfa, &start)
}
for _, c := range re { for _, c := range re {
if c.nodetype == CHARACTER || c.nodetype == ASSERTION { if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
state := State{} state := State{}
@@ -681,8 +755,7 @@ func thompson(re []postfixNode) (Reg, error) {
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it // Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\') replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
// Uncommenting this seems to make one of the test cases fail. Why? replaceByValue(state.except, ESC_BACKSLASH, '\\')
// replaceByValue(state.except, ESC_BACKSLASH, '\\')
nfa = append(nfa, &state) nfa = append(nfa, &state)
} }
@@ -706,15 +779,36 @@ func thompson(re []postfixNode) (Reg, error) {
// and then some other node. // and then some other node.
// These three nodes (LPAREN, the middle node and RPAREN) are extracted together, concatenated // These three nodes (LPAREN, the middle node and RPAREN) are extracted together, concatenated
// and added back in. // and added back in.
// If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN
// and RPAREN nodes.
// If neither node exists, that's a problem so I return an error.
if c.nodetype == RPAREN { if c.nodetype == RPAREN {
s.groupEnd = true s.groupEnd = true
middleNode := mustPop(&nfa) middleNode, err1 := pop(&nfa)
lparenNode := mustPop(&nfa) lparenNode, err2 := pop(&nfa)
s.groupNum = lparenNode.groupNum if err1 != nil && err2 != nil {
tmp := concatenate(lparenNode, middleNode) return Reg{}, fmt.Errorf("Imbalanced parentheses.")
to_add := concatenate(tmp, s) } else if err2 != nil { // There was no third node. ie. something like '()'
nfa = append(nfa, to_add) lparenNode = middleNode
if lparenNode.groupBegin != true { // There are only two nodes, but the first one isn't an LPAREN.
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
}
s.groupNum = lparenNode.groupNum
to_add := concatenate(lparenNode, s)
nfa = append(nfa, to_add)
} else {
// At this point, we assume all three nodes are valid ('lparenNode', 'middleNode' and 's')
if lparenNode.groupBegin {
s.groupNum = lparenNode.groupNum
} else if middleNode.groupBegin { // Something like 'a()'
s.groupNum = middleNode.groupNum
} else { // A middleNode and lparenNode exist, but neither is actually an LPAREN.
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
}
tmp := concatenate(lparenNode, middleNode)
to_add := concatenate(tmp, s)
nfa = append(nfa, to_add)
}
} }
} }
if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated
@@ -734,9 +828,16 @@ func thompson(re []postfixNode) (Reg, error) {
switch c.nodetype { switch c.nodetype {
case CONCATENATE: case CONCATENATE:
s2 := mustPop(&nfa) s2 := mustPop(&nfa)
s1 := mustPop(&nfa) // Relax the requirements for concatenation a little bit - If
s1 = concatenate(s1, s2) // the second element is not found ie. the postfixNodes look
nfa = append(nfa, s1) // like 'a~', then that's fine, we just skip the concatenation.
s1, err := pop(&nfa)
if err != nil {
nfa = append(nfa, s2)
} else {
s1 = concatenate(s1, s2)
nfa = append(nfa, s1)
}
case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
s1 := mustPop(&nfa) s1 := mustPop(&nfa)
stateToAdd := kleene(*s1) stateToAdd := kleene(*s1)

24
misc.go
View File

@@ -15,6 +15,14 @@ var LPAREN_CHAR rune = 0xF0004 // Parentheses in regex are concatenated with thi
var RPAREN_CHAR rune = 0xF0005 var RPAREN_CHAR rune = 0xF0005
var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN
var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash
var CHAR_RANGE rune = 0xF0008 // Represents a character range
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', '~', '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
// An interface for int and rune, which are identical
type character interface {
int | rune
}
// Returns true if str[idx] and str[idx-1] are separated by a word boundary. // Returns true if str[idx] and str[idx-1] are separated by a word boundary.
func isWordBoundary(str []rune, idx int) bool { func isWordBoundary(str []rune, idx int) bool {
@@ -26,9 +34,17 @@ func isWordBoundary(str []rune, idx int) bool {
return wbounded return wbounded
} }
func isSpecialChar(c rune) bool {
return slices.Contains(specialChars, c)
}
// Some special characters have metacharacter replacements. These characters, when encountered in their literal form, can be treated as regular characters.
func isSpecialCharWithMetacharReplacement(c rune) bool {
return slices.Contains([]rune{'[', ']'}, c)
}
func isNormalChar(c rune) bool { func isNormalChar(c rune) bool {
specialChars := []rune(`?*\^${}()+|[].~<>`)
specialChars = append(specialChars, LBRACKET, RBRACKET, NONCAPLPAREN_CHAR)
return !slices.Contains(specialChars, c) return !slices.Contains(specialChars, c)
} }
@@ -109,8 +125,8 @@ func Reduce[T any](slc []T, fn func(T, T) T) T {
} }
// Generate numbers in a range - start (inclusive) to end (exclusive) // Generate numbers in a range - start (inclusive) to end (exclusive)
func genRange(start, end int) []int { func genRange[T character](start, end T) []T {
toRet := make([]int, end-start) toRet := make([]T, end-start)
for i := start; i < end; i++ { for i := start; i < end; i++ {
toRet[i-start] = i toRet[i-start] = i
} }

12
nfa.go
View File

@@ -14,10 +14,11 @@ const (
EOS EOS
WBOUND WBOUND
NONWBOUND NONWBOUND
PLA // Positive lookahead PLA // Positive lookahead
NLA // Negative lookahead NLA // Negative lookahead
PLB // Positive lookbehind PLB // Positive lookbehind
NLB // Negative lookbehind NLB // Negative lookbehind
ALWAYS_TRUE // An assertion that is always true
) )
type State struct { type State struct {
@@ -103,6 +104,9 @@ func cloneStateHelper(state *State, cloneMap map[*State]*State) *State {
// Checks if the given state's assertion is true. Returns true if the given // Checks if the given state's assertion is true. Returns true if the given
// state doesn't have an assertion. // state doesn't have an assertion.
func (s State) checkAssertion(str []rune, idx int) bool { func (s State) checkAssertion(str []rune, idx int) bool {
if s.assert == ALWAYS_TRUE {
return true
}
if s.assert == SOS { if s.assert == SOS {
return idx == 0 return idx == 0
} }

View File

@@ -116,6 +116,13 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
case 'v': // Vertical tab case 'v': // Vertical tab
toReturn.nodetype = CHARACTER toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, rune(11)) toReturn.contents = append(toReturn.contents, rune(11))
case '-': // Literal hyphen - only in character class
if inCharClass {
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, '-')
} else {
return postfixNode{}, fmt.Errorf("Invalid escape character.")
}
default: // None of the above - append it as a regular character default: // None of the above - append it as a regular character
if isNormalChar(c) { // Normal characters cannot be escaped if isNormalChar(c) { // Normal characters cannot be escaped
return postfixNode{}, fmt.Errorf("Invalid escape character.") return postfixNode{}, fmt.Errorf("Invalid escape character.")

View File

@@ -1,221 +1,312 @@
package main package main
import ( import (
"fmt"
"slices" "slices"
"testing" "testing"
) )
var reTests = []struct { var reTests = []struct {
re string re string
flags []ReFlag
str string str string
result []Group // Stores all zero-groups in the match result []Group // Stores all zero-groups in the match
}{ }{
{"a", "abc", []Group{{0, 1}}}, {"a", nil, "abc", []Group{{0, 1}}},
{"a", "bca", []Group{{2, 3}}}, {"a", nil, "bca", []Group{{2, 3}}},
{"l", "ggllgg", []Group{{2, 3}, {3, 4}}}, {"l", nil, "ggllgg", []Group{{2, 3}, {3, 4}}},
{"(b|c)", "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}}, {"(b|c)", nil, "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}},
{"a+", "brerereraaaaabbbbb", []Group{{8, 13}}}, {"a+", nil, "brerereraaaaabbbbb", []Group{{8, 13}}},
{"ab+", "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}}, {"ab+", nil, "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}},
{"(b|c|A)", "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}}, {"(b|c|A)", nil, "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}},
{"ab*", "a", []Group{{0, 1}}}, {"ab*", nil, "a", []Group{{0, 1}}},
{"ab*", "abb", []Group{{0, 3}}}, {"ab*", nil, "abb", []Group{{0, 3}}},
{"a*b", "aaab", []Group{{0, 4}}}, {"a*b", nil, "aaab", []Group{{0, 4}}},
{"a*b", "qwqw", []Group{}}, {"a*b", nil, "qwqw", []Group{}},
{"(abc)*", "abcabcabc", []Group{{0, 9}, {9, 9}}}, {"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
{"((abc)|(def))*", "abcdef", []Group{{0, 6}, {6, 6}}}, {"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
{"(abc)*|(def)*", "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}}, {"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
{"b*a*a", "bba", []Group{{0, 3}}}, {"b*a*a", nil, "bba", []Group{{0, 3}}},
{"(ab)+", "abcabddd", []Group{{0, 2}, {3, 5}}}, {"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
{"a(b(c|d)*)*", "abccbd", []Group{{0, 6}}}, {"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
{"a(b|c)*d+", "abccdd", []Group{{0, 6}}}, {"a(b|c)*d+", nil, "abccdd", []Group{{0, 6}}},
{"a*", "", []Group{{0, 0}}}, {"a*", nil, "", []Group{{0, 0}}},
{"a|b", "c", []Group{}}, {"a|b", nil, "c", []Group{}},
{"(a|b)*c", "aabbc", []Group{{0, 5}}}, {"(a|b)*c", nil, "aabbc", []Group{{0, 5}}},
{"a(b|b)", "ab", []Group{{0, 2}}}, {"a(b|b)", nil, "ab", []Group{{0, 2}}},
{"a*", "aaaaaaaa", []Group{{0, 8}, {8, 8}}}, {"a*", nil, "aaaaaaaa", []Group{{0, 8}, {8, 8}}},
{"ab?", "ab", []Group{{0, 2}}}, {"ab?", nil, "ab", []Group{{0, 2}}},
{"a?b", "ab", []Group{{0, 2}}}, {"a?b", nil, "ab", []Group{{0, 2}}},
{"a?", "", []Group{{0, 0}}}, {"a?", nil, "", []Group{{0, 0}}},
{"a?b?c", "a", []Group{}}, {"a?b?c", nil, "a", []Group{}},
{"a?b?c?", "ab", []Group{{0, 2}, {2, 2}}}, {"a?b?c?", nil, "ab", []Group{{0, 2}, {2, 2}}},
{"a?b?c?", "ac", []Group{{0, 2}, {2, 2}}}, {"a?b?c?", nil, "ac", []Group{{0, 2}, {2, 2}}},
{"a?b?c", "abc", []Group{{0, 3}}}, {"a?b?c", nil, "abc", []Group{{0, 3}}},
{"a?b?c", "acb", []Group{{0, 2}}}, {"a?b?c", nil, "acb", []Group{{0, 2}}},
{"[abc]", "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}}, {"[abc]", nil, "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}},
{"[ab]c", "ab", []Group{}}, {"[ab]c", nil, "ab", []Group{}},
{"g[ab]c", "gac", []Group{{0, 3}}}, {"g[ab]c", nil, "gac", []Group{{0, 3}}},
{"g[ab]c", "gbc", []Group{{0, 3}}}, {"g[ab]c", nil, "gbc", []Group{{0, 3}}},
{"g[ab]c", "gc", []Group{}}, {"g[ab]c", nil, "gc", []Group{}},
{"g[ab]c", "gfc", []Group{}}, {"g[ab]c", nil, "gfc", []Group{}},
{"[ab]*", "aabbbabaababab", []Group{{0, 14}, {14, 14}}}, {"[ab]*", nil, "aabbbabaababab", []Group{{0, 14}, {14, 14}}},
{"[ab]+", "aabbbablaababab", []Group{{0, 7}, {8, 15}}}, {"[ab]+", nil, "aabbbablaababab", []Group{{0, 7}, {8, 15}}},
{"[Ff]r[Uu]it", "fruit", []Group{{0, 5}}}, {"[Ff]r[Uu]it", nil, "fruit", []Group{{0, 5}}},
{"[Ff]r[Uu]it", "FrUit", []Group{{0, 5}}}, {"[Ff]r[Uu]it", nil, "FrUit", []Group{{0, 5}}},
{"[Ff]r[Uu|]it", "Fr|it", []Group{{0, 5}}}, {"[Ff]r[Uu|]it", nil, "Fr|it", []Group{{0, 5}}},
{"[Ff]r([Uu]|[pP])it", "Frpit", []Group{{0, 5}}}, {"[Ff]r([Uu]|[pP])it", nil, "Frpit", []Group{{0, 5}}},
{"[Ff]r[Uu]|[pP]it", "Frpit", []Group{{2, 5}}}, {"[Ff]r[Uu]|[pP]it", nil, "Frpit", []Group{{2, 5}}},
{"[a-zA-Z]+", "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}}, {"[a-zA-Z]+", nil, "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}},
{".+", "Hello, how is it going?", []Group{{0, 23}}}, {".+", nil, "Hello, how is it going?", []Group{{0, 23}}},
{"a.", "a ", []Group{{0, 2}}}, {"a.", nil, "a ", []Group{{0, 2}}},
{"a.b", "a/b", []Group{{0, 3}}}, {"a.b", nil, "a/b", []Group{{0, 3}}},
{".", "a ", []Group{{0, 1}, {1, 2}}}, {".", nil, "a ", []Group{{0, 1}, {1, 2}}},
{"a.", "a ", []Group{{0, 2}}}, {"a.", nil, "a ", []Group{{0, 2}}},
{".+b", "abc", []Group{{0, 2}}}, {".+b", nil, "abc", []Group{{0, 2}}},
{`\d`, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}}, {`\d`, nil, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}},
{`\\`, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}}, {`\\`, nil, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}},
{`\W`, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}}, {`\W`, nil, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}},
{`\w`, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}}, {`\w`, nil, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}},
{`\s`, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}}, {`\s`, nil, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
{`\<`, "<HTML><body>", []Group{{0, 1}, {6, 7}}}, {`\<`, nil, "<HTML><body>", []Group{{0, 1}, {6, 7}}},
{`\(.+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}}, {`\(.+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}},
{"[^abc]+", "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}}, {"[^abc]+", nil, "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}},
{"[^a]+", "qqqaq", []Group{{0, 3}, {4, 5}}}, {"[^a]+", nil, "qqqaq", []Group{{0, 3}, {4, 5}}},
{"[^0-9]+", "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}}, {"[^0-9]+", nil, "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}},
{"[^abc]+", "ababababbababaccacacacaca", []Group{}}, {"[^abc]+", nil, "ababababbababaccacacacaca", []Group{}},
{`\[`, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}}, {`\[`, nil, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}},
{`\([^)]+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}}, {`\([^)]+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}},
{"^ab", "ab bab", []Group{{0, 2}}}, {"^ab", nil, "ab bab", []Group{{0, 2}}},
{"^aaaa^", "aaaaaaaa", []Group{}}, {"^aaaa^", nil, "aaaaaaaa", []Group{}},
{"^([bB][Gg])", "bG", []Group{{0, 2}}}, {"^([bB][Gg])", nil, "bG", []Group{{0, 2}}},
{"b$", "ba", []Group{}}, {"b$", nil, "ba", []Group{}},
{"(boy|girl)$", "girlf", []Group{}}, {"(boy|girl)$", nil, "girlf", []Group{}},
{`\bint\b`, "print int integer", []Group{{6, 9}}}, {`\bint\b`, nil, "print int integer", []Group{{6, 9}}},
{`int\b`, "ints", []Group{}}, {`int\b`, nil, "ints", []Group{}},
{`int(\b|a)`, "inta", []Group{{0, 4}}}, {`int(\b|a)`, nil, "inta", []Group{{0, 4}}},
{`\b\d+\b`, "511 a3 43", []Group{{0, 3}, {7, 9}}}, {`\b\d+\b`, nil, "511 a3 43", []Group{{0, 3}, {7, 9}}},
{`\Bint\B`, "prints int integer print", []Group{{2, 5}}}, {`\Bint\B`, nil, "prints int integer print", []Group{{2, 5}}},
{`^`, "5^3^2", []Group{{0, 0}}}, {`^`, nil, "5^3^2", []Group{{0, 0}}},
{`\^`, "5^3^2", []Group{{1, 2}, {3, 4}}}, {`\^`, nil, "5^3^2", []Group{{1, 2}, {3, 4}}},
{`pool$`, "pool carpool", []Group{{8, 12}}}, {`pool$`, nil, "pool carpool", []Group{{8, 12}}},
{`^int$`, "print int integer", []Group{}}, {`^int$`, nil, "print int integer", []Group{}},
{`^int$`, "int", []Group{{0, 3}}}, {`^int$`, nil, "int", []Group{{0, 3}}},
{`b*`, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}}, {`b*`, nil, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}},
{"a{4}", "aabaaa", []Group{}}, {"a{4}", nil, "aabaaa", []Group{}},
{"ab{5}", "abbbbbab", []Group{{0, 6}}}, {"ab{5}", nil, "abbbbbab", []Group{{0, 6}}},
{"(a|b){3,4}", "aba", []Group{{0, 3}}}, {"(a|b){3,4}", nil, "aba", []Group{{0, 3}}},
{"(a|b){3,4}", "ababaa", []Group{{0, 4}}}, {"(a|b){3,4}", nil, "ababaa", []Group{{0, 4}}},
{"(bc){5,}", "bcbcbcbcbcbcbcbc", []Group{{0, 16}}}, {"(bc){5,}", nil, "bcbcbcbcbcbcbcbc", []Group{{0, 16}}},
{`\d{3,4}`, "1209", []Group{{0, 4}}}, {`\d{3,4}`, nil, "1209", []Group{{0, 4}}},
{`\d{3,4}`, "109", []Group{{0, 3}}}, {`\d{3,4}`, nil, "109", []Group{{0, 3}}},
{`\d{3,4}`, "5", []Group{}}, {`\d{3,4}`, nil, "5", []Group{}},
{`\d{3,4}`, "123135", []Group{{0, 4}}}, {`\d{3,4}`, nil, "123135", []Group{{0, 4}}},
{`\d{3,4}`, "89a-0", []Group{}}, {`\d{3,4}`, nil, "89a-0", []Group{}},
{`\d{3,4}`, "ababab555", []Group{{6, 9}}}, {`\d{3,4}`, nil, "ababab555", []Group{{6, 9}}},
{`\bpaint\b`, "paints", []Group{}}, {`\bpaint\b`, nil, "paints", []Group{}},
{`\b\w{5}\b`, "paint", []Group{{0, 5}}}, {`\b\w{5}\b`, nil, "paint", []Group{{0, 5}}},
{`[^\w]`, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}}, {`[^\w]`, nil, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
{`[^\W]`, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}}, {`[^\W]`, nil, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
{`[\[\]]`, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}}, {`[\[\]]`, nil, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
// Unicode tests // Unicode tests
{`.+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}}, {`.+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
{`a.b`, "a²b", []Group{{0, 3}}}, {`a.b`, nil, "a²b", []Group{{0, 3}}},
{`[^a]+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}}, {`[^a]+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
// Fun experiment - AI-generated tests // Fun experiment - AI-generated tests
{"(abc|def|ghi)", "abcdefg", []Group{{0, 3}, {3, 6}}}, {"(abc|def|ghi)", nil, "abcdefg", []Group{{0, 3}, {3, 6}}},
{"a(b|c)d", "abcd", []Group{}}, {"a(b|c)d", nil, "abcd", []Group{}},
{"a(b|c)*d", "abcbcd", []Group{{0, 6}}}, {"a(b|c)*d", nil, "abcbcd", []Group{{0, 6}}},
{"a(b|c)+d", "abcbcd", []Group{{0, 6}}}, {"a(b|c)+d", nil, "abcbcd", []Group{{0, 6}}},
{"a(b|c)?d", "abd", []Group{{0, 3}}}, {"a(b|c)?d", nil, "abd", []Group{{0, 3}}},
{".+", "hello world", []Group{{0, 11}}}, {".+", nil, "hello world", []Group{{0, 11}}},
{"a.b", "aXb", []Group{{0, 3}}}, {"a.b", nil, "aXb", []Group{{0, 3}}},
{"a.*b", "aXb", []Group{{0, 3}}}, {"a.*b", nil, "aXb", []Group{{0, 3}}},
{"a.{2,3}b", "aXXb", []Group{{0, 4}}}, {"a.{2,3}b", nil, "aXXb", []Group{{0, 4}}},
{"a.{2,}b", "aXXXb", []Group{{0, 5}}}, {"a.{2,}b", nil, "aXXXb", []Group{{0, 5}}},
{"a.{0,3}b", "ab", []Group{{0, 2}}}, {"a.{0,3}b", nil, "ab", []Group{{0, 2}}},
{"[abc]+", "abcabc", []Group{{0, 6}}}, {"[abc]+", nil, "abcabc", []Group{{0, 6}}},
{"[a-zA-Z]+", "HelloWorld", []Group{{0, 10}}}, {"[a-zA-Z]+", nil, "HelloWorld", []Group{{0, 10}}},
{"[^abc]+", "defghi", []Group{{0, 6}}}, {"[^abc]+", nil, "defghi", []Group{{0, 6}}},
{"^hello", "hello world", []Group{{0, 5}}}, {"^hello", nil, "hello world", []Group{{0, 5}}},
{"world$", "hello world", []Group{{6, 11}}}, {"world$", nil, "hello world", []Group{{6, 11}}},
{`\bhello\b`, "hello world", []Group{{0, 5}}}, {`\bhello\b`, nil, "hello world", []Group{{0, 5}}},
{`\Bhello\B`, "hello world", []Group{}}, {`\Bhello\B`, nil, "hello world", []Group{}},
{"(hello|world)", "hello world", []Group{{0, 5}, {6, 11}}}, {"(hello|world)", nil, "hello world", []Group{{0, 5}, {6, 11}}},
{"(hello|world)+", "hello world", []Group{{0, 5}, {6, 11}}}, {"(hello|world)+", nil, "hello world", []Group{{0, 5}, {6, 11}}},
{"(hello|world)*", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}}, {"(hello|world)*", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
{"(hello|world)?", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}}, {"(hello|world)?", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
{"ú.+ï", "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}}, {"ú.+ï", nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}},
{"(?=hello)", "hello world", []Group{{0, 0}}}, {"(?=hello)", nil, "hello world", []Group{{0, 0}}},
{"(?!hello)", "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}}, {"(?!hello)", nil, "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
{"(?<=hello)", "hello world", []Group{{5, 5}}}, {"(?<=hello)", nil, "hello world", []Group{{5, 5}}},
{"(?<!hello)", "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}}, {"(?<!hello)", nil, "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "40", []Group{{0, 2}}}, {"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "40", []Group{{0, 2}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "040", []Group{}}, {"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "040", []Group{}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "400", []Group{{0, 3}}}, {"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "400", []Group{{0, 3}}},
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "4000", []Group{}}, {"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "4000", []Group{}},
{"a{1,3}", "aaaaa", []Group{{0, 3}, {3, 5}}}, {"a{1,3}", nil, "aaaaa", []Group{{0, 3}, {3, 5}}},
{`\\[ab\\]`, "a", []Group{}}, {`\\[ab\\]`, nil, "a", []Group{}},
{`\\[ab\\]`, `\a`, []Group{{0, 2}}}, {`\\[ab\\]`, nil, `\a`, []Group{{0, 2}}},
// Lookaround tests // Lookaround tests
{"(?<=bo)y", "boy", []Group{{2, 3}}}, {"(?<=bo)y", nil, "boy", []Group{{2, 3}}},
{"bo(?=y)", "boy", []Group{{0, 2}}}, {"bo(?=y)", nil, "boy", []Group{{0, 2}}},
{"(?<=f)f+(?=f)", "fffff", []Group{{1, 4}}}, {"(?<=f)f+(?=f)", nil, "fffff", []Group{{1, 4}}},
{"(?<=f)f+(?=f)", "fffffa", []Group{{1, 4}}}, {"(?<=f)f+(?=f)", nil, "fffffa", []Group{{1, 4}}},
// Test cases from Python's RE test suite // Test cases from Python's RE test suite
{`[\1]`, "\x01", []Group{{0, 1}}}, {`[\1]`, nil, "\x01", []Group{{0, 1}}},
{`\0`, "\x00", []Group{{0, 1}}}, {`\0`, nil, "\x00", []Group{{0, 1}}},
{`[\0a]`, "\x00", []Group{{0, 1}}}, {`[\0a]`, nil, "\x00", []Group{{0, 1}}},
{`[\0a]`, "\x00", []Group{{0, 1}}}, {`[\0a]`, nil, "\x00", []Group{{0, 1}}},
{`[a\0]`, "\x00", []Group{{0, 1}}}, {`[a\0]`, nil, "\x00", []Group{{0, 1}}},
{`[^a\0]`, "\x00", []Group{}}, {`[^a\0]`, nil, "\x00", []Group{}},
{`\a[\b]\f\n\r\t\v`, "\a\b\f\n\r\t\v", []Group{{0, 7}}}, {`\a[\b]\f\n\r\t\v`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
{`[\a][\b][\f][\n][\r][\t][\v]`, "\a\b\f\n\r\t\v", []Group{{0, 7}}}, {`[\a][\b][\f][\n][\r][\t][\v]`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
{`\u`, "", nil}, {`\u`, nil, "", nil},
{`\xff`, "ÿ", []Group{{0, 1}}}, {`\xff`, nil, "ÿ", []Group{{0, 1}}},
{`\x00ffffffffffffff`, "\xff", []Group{}}, {`\x00ffffffffffffff`, nil, "\xff", []Group{}},
{`\x00f`, "\x0f", []Group{}}, {`\x00f`, nil, "\x0f", []Group{}},
{`\x00fe`, "\xfe", []Group{}}, {`\x00fe`, nil, "\xfe", []Group{}},
{`^\w+=(\\[\000-\277]|[^\n\\])*`, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}}, {`^\w+=(\\[\000-\277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
{`a.b`, nil, `acb`, []Group{{0, 3}}},
{`a.b`, nil, "a\nb", []Group{}},
{`a.*b`, nil, "acc\nccb", []Group{}},
{`a.{4,5}b`, nil, "acc\nccb", []Group{}},
{`a.b`, nil, "a\rb", []Group{{0, 3}}},
{`a.b`, []ReFlag{RE_MULTILINE}, "a\nb", []Group{{0, 3}}},
{`a.*b`, []ReFlag{RE_MULTILINE}, "acc\nccb", []Group{{0, 7}}},
{`a.{4,5}b`, []ReFlag{RE_MULTILINE}, "acc\nccb", []Group{{0, 7}}},
{`)`, nil, ``, nil},
{`^$`, nil, ``, []Group{{0, 0}}},
{`abc`, nil, `abc`, []Group{{0, 3}}},
{`abc`, nil, `xbc`, []Group{}},
{`abc`, nil, `axc`, []Group{}},
{`abc`, nil, `abx`, []Group{}},
{`abc`, nil, `xabcy`, []Group{{1, 4}}},
{`abc`, nil, `ababc`, []Group{{2, 5}}},
{`ab*c`, nil, `abc`, []Group{{0, 3}}},
{`ab*bc`, nil, `abc`, []Group{{0, 3}}},
{`ab*bc`, nil, `abbc`, []Group{{0, 4}}},
{`ab*bc`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab+bc`, nil, `abbc`, []Group{{0, 4}}},
{`ab+bc`, nil, `abc`, []Group{}},
{`ab+bc`, nil, `abq`, []Group{}},
{`ab+bc`, nil, `abbbbc`, []Group{{0, 6}}},
{`ab?bc`, nil, `abbc`, []Group{{0, 4}}},
{`ab?bc`, nil, `abc`, []Group{{0, 3}}},
{`ab?bc`, nil, `abbbbc`, []Group{}},
{`ab?c`, nil, `abc`, []Group{{0, 3}}},
{`^abc$`, nil, `abc`, []Group{{0, 3}}},
{`^abc$`, nil, `abcc`, []Group{}},
{`^abc`, nil, `abcc`, []Group{{0, 3}}},
{`^abc$`, nil, `aabc`, []Group{}},
{`abc$`, nil, `aabc`, []Group{{1, 4}}},
{`^`, nil, `abc`, []Group{{0, 0}}},
{`$`, nil, `abc`, []Group{{3, 3}}},
{`a.c`, nil, `abc`, []Group{{0, 3}}},
{`a.c`, nil, `axc`, []Group{{0, 3}}},
{`a.*c`, nil, `axyzc`, []Group{{0, 5}}},
{`a.*c`, nil, `axyzd`, []Group{}},
{`a[bc]d`, nil, `abc`, []Group{}},
{`a[bc]d`, nil, `abd`, []Group{{0, 3}}},
{`a[b-d]e`, nil, `abd`, []Group{}},
{`a[b-d]e`, nil, `ace`, []Group{{0, 3}}},
{`a[b-d]`, nil, `aac`, []Group{{1, 3}}},
{`a[-b]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
{`a[\-b]`, nil, `a-`, []Group{{0, 2}}},
{`a[b-]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
{`a[]b`, nil, `-`, nil},
{`a[`, nil, `-`, nil},
{`a\`, nil, `-`, nil},
{`abc)`, nil, `-`, nil},
{`(abc`, nil, `-`, nil},
{`a]`, nil, `a]`, []Group{{0, 2}}},
{`a[]]b`, nil, `a]b`, []Group{{0, 3}}},
{`a[\]]b`, nil, `a]b`, []Group{{0, 3}}},
{`a[^bc]d`, nil, `aed`, []Group{{0, 3}}},
{`a[^bc]d`, nil, `abd`, []Group{}},
{`a[^-b]c`, nil, `adc`, []Group{{0, 3}}},
{`a[^-b]c`, nil, `a-c`, []Group{}},
{`a[^]b]c`, nil, `a]c`, []Group{}},
{`a[^]b]c`, nil, `adc`, []Group{{0, 3}}},
{`\ba\b`, nil, `a-`, []Group{{0, 1}}},
{`\ba\b`, nil, `-a`, []Group{{1, 2}}},
{`\ba\b`, nil, `-a-`, []Group{{1, 2}}},
{`\by\b`, nil, `xy`, []Group{}},
{`\by\b`, nil, `yz`, []Group{}},
{`\by\b`, nil, `xyz`, []Group{}},
{`x\b`, nil, `xyz`, []Group{}},
{`x\B`, nil, `xyz`, []Group{{0, 1}}},
{`\Bz`, nil, `xyz`, []Group{{2, 3}}},
{`z\B`, nil, `xyz`, []Group{}},
{`\Bx`, nil, `xyz`, []Group{}},
{`\Ba\B`, nil, `a-`, []Group{}},
{`\Ba\B`, nil, `-a`, []Group{}},
{`\Ba\B`, nil, `-a-`, []Group{}},
{`\By\B`, nil, `xy`, []Group{}},
{`\By\B`, nil, `yz`, []Group{}},
{`\By\b`, nil, `xy`, []Group{{1, 2}}},
{`\by\B`, nil, `yz`, []Group{{0, 1}}},
{`\By\B`, nil, `xyz`, []Group{{1, 2}}},
{`ab|cd`, nil, `abc`, []Group{{0, 2}}},
{`ab|cd`, nil, `abcd`, []Group{{0, 2}, {2, 4}}},
// Todo - add numeric range tests // Todo - add numeric range tests
} }
var groupTests = []struct { var groupTests = []struct {
re string re string
flags []ReFlag
str string str string
result []Match result []Match
}{ }{
{"(a)(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}}, {"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
{"((a))(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}}, {"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
{"(0)", "ab", []Match{[]Group{}}}, {"(0)", nil, "ab", []Match{[]Group{}}},
{"(a)b", "ab", []Match{[]Group{{0, 2}, {0, 1}}}}, {"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
{"a(b)", "ab", []Match{[]Group{{0, 2}, {1, 2}}}}, {"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
{"(a|b)", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}}, {"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
{"(a)|(b)", "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}}, {"(a)|(b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}},
{"(a+)(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}}, {"(a+)(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}},
{"(a+)|(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}}, {"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(a+)(aa)", "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}}, {"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
{"(aaaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}}, {"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}}, {"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
{"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}}, {"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
{"(aaaa)|(aaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}}, {"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
{"(a)|(aa)", "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}}, {"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
{"(a?)a?", "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}}, {"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
{"(a?)a?", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}}, {"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
{"(a?)a?", "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}}, {"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
{"a((b.d){3})", "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}}, {"a((b.d){3})", nil, "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}},
{`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, `abcdefghijkl9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}}, {`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, nil, `abcdefghijkl9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}},
{`()ef`, nil, `def`, []Match{[]Group{{1, 3}, {1, 1}}}},
{`(?:)ef`, nil, `def`, []Match{[]Group{{1, 3}}}},
{`(?:)`, nil, `def`, []Match{[]Group{{0, 0}}, []Group{{1, 1}}, []Group{{2, 2}}, []Group{{3, 3}}}},
} }
func TestFindAllMatches(t *testing.T) { func TestFindAllMatches(t *testing.T) {
for _, test := range reTests { for _, test := range reTests {
t.Run(test.re+" "+test.str, func(t *testing.T) { t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re) regComp, err := Compile(test.re, test.flags...)
if err != nil { if err != nil {
if test.result != nil { if test.result != nil {
panic(err) panic(fmt.Errorf("Test Error: %v", err))
} }
} else { } else {
matchIndices := FindAllMatches(regComp, test.str) matchIndices := FindAllMatches(regComp, test.str)
@@ -234,7 +325,7 @@ func TestFindAllMatches(t *testing.T) {
func TestFindString(t *testing.T) { func TestFindString(t *testing.T) {
for _, test := range reTests { for _, test := range reTests {
t.Run(test.re+" "+test.str, func(t *testing.T) { t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re) regComp, err := Compile(test.re, test.flags...)
if err != nil { if err != nil {
if test.result != nil { if test.result != nil {
panic(err) panic(err)
@@ -259,7 +350,7 @@ func TestFindString(t *testing.T) {
func TestFindAllGroups(t *testing.T) { func TestFindAllGroups(t *testing.T) {
for _, test := range groupTests { for _, test := range groupTests {
t.Run(test.re+" "+test.str, func(t *testing.T) { t.Run(test.re+" "+test.str, func(t *testing.T) {
regComp, err := Compile(test.re) regComp, err := Compile(test.re, test.flags...)
if err != nil { if err != nil {
if test.result != nil { if test.result != nil {
panic(err) panic(err)