Compare commits
21 Commits
4c96cfa06c
...
08e01a1c81
Author | SHA1 | Date | |
---|---|---|---|
08e01a1c81 | |||
5c2869ff81 | |||
4dfc77900f | |||
93903fc557 | |||
036e625a15 | |||
4966a222f9 | |||
263619c50c | |||
d7c9c181e1 | |||
5a085907cf | |||
65e5b4e2af | |||
1520edad55 | |||
6fb266e0d2 | |||
423fcc9b54 | |||
cf4d305b31 | |||
9d3c228ace | |||
5e12fe1c42 | |||
f87458ee99 | |||
2937f2d917 | |||
efab70f9dc | |||
cf964e41db | |||
649485f01d |
187
compile.go
187
compile.go
@@ -82,6 +82,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
||||||
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
||||||
//
|
//
|
||||||
|
// Another check is made for unescaped brackets - opening brackets are replaced with LBRACKET and closing brackets are replaced with RBRACKET.
|
||||||
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
|
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
|
||||||
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
|
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
|
||||||
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
||||||
@@ -122,6 +123,12 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
|
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
|
||||||
re_runes = append(re_runes, ESC_BACKSLASH)
|
re_runes = append(re_runes, ESC_BACKSLASH)
|
||||||
i++
|
i++
|
||||||
|
} else if c == '[' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
|
||||||
|
re_runes = append(re_runes, LBRACKET)
|
||||||
|
continue
|
||||||
|
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
|
||||||
|
re_runes = append(re_runes, RBRACKET)
|
||||||
|
continue
|
||||||
} else {
|
} else {
|
||||||
re_runes = append(re_runes, c)
|
re_runes = append(re_runes, c)
|
||||||
}
|
}
|
||||||
@@ -141,39 +148,28 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
i := 0
|
i := 0
|
||||||
for i < len(re_runes) {
|
for i < len(re_runes) {
|
||||||
re_postfix = append(re_postfix, re_runes[i])
|
re_postfix = append(re_postfix, re_runes[i])
|
||||||
if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped. Inside this block, the only task is to expand character ranges into their constituent characters.
|
if re_runes[i] == LBRACKET && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped.
|
||||||
re_postfix[len(re_postfix)-1] = LBRACKET // Replace the '[' character with LBRACKET. This allows for easier parsing of all characters (including opening and closing brackets) within the character class
|
toAppend := make([]rune, 0) // Holds all the runes in the current character class
|
||||||
toAppend := make([]rune, 0) // Holds all the runes in the current character class
|
|
||||||
if i < len(re_runes)-1 && re_runes[i+1] == '^' { // Inverting class - match everything NOT in brackets
|
i++ // Skip past LBRACKET, because it was already added
|
||||||
re_postfix = append(re_postfix, '^')
|
if i >= len(re_runes) { // Sanity check before we start
|
||||||
i++ // Skip opening bracket and caret
|
return nil, fmt.Errorf("Opening bracket without closing bracket.")
|
||||||
}
|
}
|
||||||
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
|
|
||||||
return nil, fmt.Errorf("Empty character class.")
|
|
||||||
}
|
|
||||||
for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
|
|
||||||
i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
|
|
||||||
// TODO: Check for escaped characters
|
|
||||||
|
|
||||||
// Check ahead for character range
|
for re_runes[i] != RBRACKET || i == 0 || re_runes[i-1] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
|
||||||
if i < len(re_runes)-2 && re_runes[i+1] == '-' {
|
// Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
|
||||||
rangeStart := re_runes[i]
|
if i >= len(re_runes) {
|
||||||
rangeEnd := re_runes[i+2]
|
return nil, fmt.Errorf("Opening bracket without closing bracket.")
|
||||||
if int(rangeEnd) < int(rangeStart) {
|
}
|
||||||
return nil, fmt.Errorf("Range is out of order.")
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := rangeStart; i <= rangeEnd; i++ {
|
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != RBRACKET) { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
|
||||||
toAppend = append(toAppend, i)
|
re_runes[i] = CHAR_RANGE
|
||||||
}
|
|
||||||
|
|
||||||
i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
toAppend = append(toAppend, re_runes[i])
|
toAppend = append(toAppend, re_runes[i])
|
||||||
|
i++
|
||||||
}
|
}
|
||||||
// Replace the last character (which should have been ']', with RBRACKET
|
// Add in the RBRACKET
|
||||||
toAppend[len(toAppend)-1] = RBRACKET
|
toAppend = append(toAppend, RBRACKET)
|
||||||
re_postfix = append(re_postfix, toAppend...)
|
re_postfix = append(re_postfix, toAppend...)
|
||||||
}
|
}
|
||||||
if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either
|
if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either
|
||||||
@@ -280,7 +276,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
|
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
|
||||||
*/
|
*/
|
||||||
c := re_postfix[i]
|
c := re_postfix[i]
|
||||||
if isNormalChar(c) {
|
if isNormalChar(c) || isSpecialCharWithMetacharReplacement(c) {
|
||||||
if caseInsensitive {
|
if caseInsensitive {
|
||||||
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
|
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
|
||||||
} else {
|
} else {
|
||||||
@@ -288,7 +284,18 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// Escape character
|
// Since every unescaped bracket is replaced by a LBRACKET / RBRACKET, there may
|
||||||
|
// have been false positives. For example, the regex ']' has a closing bracket, but it
|
||||||
|
// isn't denoting a character class; it's just a regular character. Since it's not escaped,
|
||||||
|
// though, I would have converted this into an RBRACKET.
|
||||||
|
// To deal with this, I make the following assertion:
|
||||||
|
// If at any point I see an RBRACKET 'in the wild' (not in a character class), then it must be
|
||||||
|
// a regular character, with no special significance.
|
||||||
|
if c == RBRACKET {
|
||||||
|
outQueue = append(outQueue, newPostfixCharNode(']'))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
|
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
|
||||||
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
|
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
|
||||||
return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
|
return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
|
||||||
@@ -420,7 +427,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if c == LBRACKET { // Used for character classes
|
if c == LBRACKET { // Used for character classes
|
||||||
i++ // Step forward so we can look at the character class
|
firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
|
||||||
|
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
|
||||||
|
i++ // Step forward so we can look at the character class
|
||||||
|
// Oops, there's nothing there to look at
|
||||||
|
if i >= len(re_postfix) {
|
||||||
|
return nil, fmt.Errorf("Opening bracket with no closing bracket.")
|
||||||
|
}
|
||||||
var invertMatch bool
|
var invertMatch bool
|
||||||
if re_postfix[i] == '^' {
|
if re_postfix[i] == '^' {
|
||||||
invertMatch = true
|
invertMatch = true
|
||||||
@@ -428,9 +441,14 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
|
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
|
||||||
for i < len(re_postfix) {
|
for i < len(re_postfix) {
|
||||||
if re_postfix[i] == RBRACKET {
|
if firstCharAdded && re_postfix[i] == RBRACKET {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
if re_postfix[i] == CHAR_RANGE {
|
||||||
|
endOfRange = true
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
if re_postfix[i] == '\\' { // Backslash indicates a character to be escaped
|
if re_postfix[i] == '\\' { // Backslash indicates a character to be escaped
|
||||||
if i == len(re_postfix)-1 {
|
if i == len(re_postfix)-1 {
|
||||||
return nil, fmt.Errorf("Stray backslash in character class.")
|
return nil, fmt.Errorf("Stray backslash in character class.")
|
||||||
@@ -483,13 +501,54 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
|
||||||
|
switch re_postfix[i] {
|
||||||
|
case LBRACKET:
|
||||||
|
chars = append(chars, newPostfixCharNode('['))
|
||||||
|
case RBRACKET:
|
||||||
|
chars = append(chars, newPostfixCharNode(']'))
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("Error parsing high-range unicode value in character class.")
|
||||||
|
}
|
||||||
|
}
|
||||||
chars = append(chars, newPostfixCharNode(re_postfix[i]))
|
chars = append(chars, newPostfixCharNode(re_postfix[i]))
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
firstCharAdded = true
|
||||||
|
|
||||||
|
if endOfRange { // The previous character was an unescaped hyphen, which (in the context of a character class) means the character that was last appended is the end of a character range
|
||||||
|
// Things to note:
|
||||||
|
// 1. In PCRE and Go's regex engine, a letter _can_ be surrounded by hyphens in a character class.
|
||||||
|
// Eg. [a-b-c]
|
||||||
|
// While you might think this leads to a syntax error (I thought so), the engine picks 'a-b' as a range,
|
||||||
|
// then treats the second '-' and 'c' as regular characters in the character class.
|
||||||
|
// So this regex becomes "Match a character from 'a' to 'b', a literal hyphen, or 'c' ".
|
||||||
|
// 2. To account for this, the following logic is followed:
|
||||||
|
// a. If the second-to-last postfixNode ie. the start of the range has only one element, then we are in a range.
|
||||||
|
// i. If it has more than one element, then we are actually looking at a literal hyphen, and we will treat is as such.
|
||||||
|
// ii. If either the start or end of the range don't exist in 'chars' ie. something like [-a] or [a-], then too will we treat it as a literal hyphen.
|
||||||
|
// b. The last postfixNode added to 'chars' _must_ only have one character (because it's the end of the range).
|
||||||
|
endRangePostfixNode, err1 := pop(&chars)
|
||||||
|
startRangePostfixNode, err2 := pop(&chars)
|
||||||
|
|
||||||
|
if (err1 != nil || err2 != nil) || len(startRangePostfixNode.contents) != 1 { // Treat it as a regular hyphen
|
||||||
|
chars = append(chars, startRangePostfixNode, newPostfixCharNode('-'), endRangePostfixNode)
|
||||||
|
} else if len(endRangePostfixNode.contents) != 1 { // I don't even know what this would look like, this is just a sanity check
|
||||||
|
return nil, fmt.Errorf("Error parsing character range.")
|
||||||
|
} else {
|
||||||
|
// We have established that they both have a length of 1
|
||||||
|
startRangeRune := startRangePostfixNode.contents[0]
|
||||||
|
endRangeRune := endRangePostfixNode.contents[0]
|
||||||
|
chars = append(chars, newPostfixCharNode(genRange(startRangeRune, endRangeRune+1)...))
|
||||||
|
}
|
||||||
|
|
||||||
|
endOfRange = false // Reset the flag
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.
|
if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.
|
||||||
return nil, fmt.Errorf("Opening bracket without closing bracket.")
|
return nil, fmt.Errorf("Opening bracket without closing bracket.")
|
||||||
}
|
}
|
||||||
|
|
||||||
outQueue = append(outQueue, newCharClassNode(chars, invertMatch))
|
outQueue = append(outQueue, newCharClassNode(chars, invertMatch))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -599,6 +658,21 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
func thompson(re []postfixNode) (Reg, error) {
|
func thompson(re []postfixNode) (Reg, error) {
|
||||||
nfa := make([]*State, 0) // Stack of states
|
nfa := make([]*State, 0) // Stack of states
|
||||||
numGroups := 0 // Number of capturing groups
|
numGroups := 0 // Number of capturing groups
|
||||||
|
|
||||||
|
// If thompson() receives an empty regex, then whatever was given to shuntingYard()
|
||||||
|
// was parsed away. This doesn't mean that the regex itself is empty.
|
||||||
|
// For example, it could have been '(?:)'. This is an empty non-capturing group. Since
|
||||||
|
// shuntingYard() doesn't include non-capturing groups in its output (and the group contains
|
||||||
|
// nothing), the output of shuntingYard() (and the input to thompson()) ends up being empty.
|
||||||
|
// In these cases, we will return an NFA with 1 state, with an assertion that is always true.
|
||||||
|
if len(re) == 0 {
|
||||||
|
start := newState()
|
||||||
|
start.content = newContents(EPSILON)
|
||||||
|
start.isEmpty = true
|
||||||
|
start.assert = ALWAYS_TRUE
|
||||||
|
nfa = append(nfa, &start)
|
||||||
|
}
|
||||||
|
|
||||||
for _, c := range re {
|
for _, c := range re {
|
||||||
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
|
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
|
||||||
state := State{}
|
state := State{}
|
||||||
@@ -681,8 +755,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
|
|
||||||
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
|
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
|
||||||
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
|
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
|
||||||
// Uncommenting this seems to make one of the test cases fail. Why?
|
replaceByValue(state.except, ESC_BACKSLASH, '\\')
|
||||||
// replaceByValue(state.except, ESC_BACKSLASH, '\\')
|
|
||||||
|
|
||||||
nfa = append(nfa, &state)
|
nfa = append(nfa, &state)
|
||||||
}
|
}
|
||||||
@@ -706,15 +779,36 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
// and then some other node.
|
// and then some other node.
|
||||||
// These three nodes (LPAREN, the middle node and RPAREN) are extracted together, concatenated
|
// These three nodes (LPAREN, the middle node and RPAREN) are extracted together, concatenated
|
||||||
// and added back in.
|
// and added back in.
|
||||||
|
// If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN
|
||||||
|
// and RPAREN nodes.
|
||||||
|
// If neither node exists, that's a problem so I return an error.
|
||||||
if c.nodetype == RPAREN {
|
if c.nodetype == RPAREN {
|
||||||
s.groupEnd = true
|
s.groupEnd = true
|
||||||
middleNode := mustPop(&nfa)
|
middleNode, err1 := pop(&nfa)
|
||||||
lparenNode := mustPop(&nfa)
|
lparenNode, err2 := pop(&nfa)
|
||||||
s.groupNum = lparenNode.groupNum
|
if err1 != nil && err2 != nil {
|
||||||
tmp := concatenate(lparenNode, middleNode)
|
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
|
||||||
to_add := concatenate(tmp, s)
|
} else if err2 != nil { // There was no third node. ie. something like '()'
|
||||||
nfa = append(nfa, to_add)
|
lparenNode = middleNode
|
||||||
|
if lparenNode.groupBegin != true { // There are only two nodes, but the first one isn't an LPAREN.
|
||||||
|
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
|
||||||
|
}
|
||||||
|
s.groupNum = lparenNode.groupNum
|
||||||
|
to_add := concatenate(lparenNode, s)
|
||||||
|
nfa = append(nfa, to_add)
|
||||||
|
} else {
|
||||||
|
// At this point, we assume all three nodes are valid ('lparenNode', 'middleNode' and 's')
|
||||||
|
if lparenNode.groupBegin {
|
||||||
|
s.groupNum = lparenNode.groupNum
|
||||||
|
} else if middleNode.groupBegin { // Something like 'a()'
|
||||||
|
s.groupNum = middleNode.groupNum
|
||||||
|
} else { // A middleNode and lparenNode exist, but neither is actually an LPAREN.
|
||||||
|
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
|
||||||
|
}
|
||||||
|
tmp := concatenate(lparenNode, middleNode)
|
||||||
|
to_add := concatenate(tmp, s)
|
||||||
|
nfa = append(nfa, to_add)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated
|
if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated
|
||||||
@@ -734,9 +828,16 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
switch c.nodetype {
|
switch c.nodetype {
|
||||||
case CONCATENATE:
|
case CONCATENATE:
|
||||||
s2 := mustPop(&nfa)
|
s2 := mustPop(&nfa)
|
||||||
s1 := mustPop(&nfa)
|
// Relax the requirements for concatenation a little bit - If
|
||||||
s1 = concatenate(s1, s2)
|
// the second element is not found ie. the postfixNodes look
|
||||||
nfa = append(nfa, s1)
|
// like 'a~', then that's fine, we just skip the concatenation.
|
||||||
|
s1, err := pop(&nfa)
|
||||||
|
if err != nil {
|
||||||
|
nfa = append(nfa, s2)
|
||||||
|
} else {
|
||||||
|
s1 = concatenate(s1, s2)
|
||||||
|
nfa = append(nfa, s1)
|
||||||
|
}
|
||||||
case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
|
case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
|
||||||
s1 := mustPop(&nfa)
|
s1 := mustPop(&nfa)
|
||||||
stateToAdd := kleene(*s1)
|
stateToAdd := kleene(*s1)
|
||||||
|
24
misc.go
24
misc.go
@@ -15,6 +15,14 @@ var LPAREN_CHAR rune = 0xF0004 // Parentheses in regex are concatenated with thi
|
|||||||
var RPAREN_CHAR rune = 0xF0005
|
var RPAREN_CHAR rune = 0xF0005
|
||||||
var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN
|
var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN
|
||||||
var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash
|
var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash
|
||||||
|
var CHAR_RANGE rune = 0xF0008 // Represents a character range
|
||||||
|
|
||||||
|
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', '~', '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
|
||||||
|
|
||||||
|
// An interface for int and rune, which are identical
|
||||||
|
type character interface {
|
||||||
|
int | rune
|
||||||
|
}
|
||||||
|
|
||||||
// Returns true if str[idx] and str[idx-1] are separated by a word boundary.
|
// Returns true if str[idx] and str[idx-1] are separated by a word boundary.
|
||||||
func isWordBoundary(str []rune, idx int) bool {
|
func isWordBoundary(str []rune, idx int) bool {
|
||||||
@@ -26,9 +34,17 @@ func isWordBoundary(str []rune, idx int) bool {
|
|||||||
return wbounded
|
return wbounded
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isSpecialChar(c rune) bool {
|
||||||
|
return slices.Contains(specialChars, c)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some special characters have metacharacter replacements. These characters, when encountered in their literal form, can be treated as regular characters.
|
||||||
|
func isSpecialCharWithMetacharReplacement(c rune) bool {
|
||||||
|
return slices.Contains([]rune{'[', ']'}, c)
|
||||||
|
}
|
||||||
|
|
||||||
func isNormalChar(c rune) bool {
|
func isNormalChar(c rune) bool {
|
||||||
specialChars := []rune(`?*\^${}()+|[].~<>`)
|
|
||||||
specialChars = append(specialChars, LBRACKET, RBRACKET, NONCAPLPAREN_CHAR)
|
|
||||||
return !slices.Contains(specialChars, c)
|
return !slices.Contains(specialChars, c)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,8 +125,8 @@ func Reduce[T any](slc []T, fn func(T, T) T) T {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate numbers in a range - start (inclusive) to end (exclusive)
|
// Generate numbers in a range - start (inclusive) to end (exclusive)
|
||||||
func genRange(start, end int) []int {
|
func genRange[T character](start, end T) []T {
|
||||||
toRet := make([]int, end-start)
|
toRet := make([]T, end-start)
|
||||||
for i := start; i < end; i++ {
|
for i := start; i < end; i++ {
|
||||||
toRet[i-start] = i
|
toRet[i-start] = i
|
||||||
}
|
}
|
||||||
|
12
nfa.go
12
nfa.go
@@ -14,10 +14,11 @@ const (
|
|||||||
EOS
|
EOS
|
||||||
WBOUND
|
WBOUND
|
||||||
NONWBOUND
|
NONWBOUND
|
||||||
PLA // Positive lookahead
|
PLA // Positive lookahead
|
||||||
NLA // Negative lookahead
|
NLA // Negative lookahead
|
||||||
PLB // Positive lookbehind
|
PLB // Positive lookbehind
|
||||||
NLB // Negative lookbehind
|
NLB // Negative lookbehind
|
||||||
|
ALWAYS_TRUE // An assertion that is always true
|
||||||
)
|
)
|
||||||
|
|
||||||
type State struct {
|
type State struct {
|
||||||
@@ -103,6 +104,9 @@ func cloneStateHelper(state *State, cloneMap map[*State]*State) *State {
|
|||||||
// Checks if the given state's assertion is true. Returns true if the given
|
// Checks if the given state's assertion is true. Returns true if the given
|
||||||
// state doesn't have an assertion.
|
// state doesn't have an assertion.
|
||||||
func (s State) checkAssertion(str []rune, idx int) bool {
|
func (s State) checkAssertion(str []rune, idx int) bool {
|
||||||
|
if s.assert == ALWAYS_TRUE {
|
||||||
|
return true
|
||||||
|
}
|
||||||
if s.assert == SOS {
|
if s.assert == SOS {
|
||||||
return idx == 0
|
return idx == 0
|
||||||
}
|
}
|
||||||
|
@@ -116,6 +116,13 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
|
|||||||
case 'v': // Vertical tab
|
case 'v': // Vertical tab
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = CHARACTER
|
||||||
toReturn.contents = append(toReturn.contents, rune(11))
|
toReturn.contents = append(toReturn.contents, rune(11))
|
||||||
|
case '-': // Literal hyphen - only in character class
|
||||||
|
if inCharClass {
|
||||||
|
toReturn.nodetype = CHARACTER
|
||||||
|
toReturn.contents = append(toReturn.contents, '-')
|
||||||
|
} else {
|
||||||
|
return postfixNode{}, fmt.Errorf("Invalid escape character.")
|
||||||
|
}
|
||||||
default: // None of the above - append it as a regular character
|
default: // None of the above - append it as a regular character
|
||||||
if isNormalChar(c) { // Normal characters cannot be escaped
|
if isNormalChar(c) { // Normal characters cannot be escaped
|
||||||
return postfixNode{}, fmt.Errorf("Invalid escape character.")
|
return postfixNode{}, fmt.Errorf("Invalid escape character.")
|
||||||
|
441
re_test.go
441
re_test.go
@@ -1,221 +1,312 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"slices"
|
"slices"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
var reTests = []struct {
|
var reTests = []struct {
|
||||||
re string
|
re string
|
||||||
|
flags []ReFlag
|
||||||
str string
|
str string
|
||||||
result []Group // Stores all zero-groups in the match
|
result []Group // Stores all zero-groups in the match
|
||||||
}{
|
}{
|
||||||
{"a", "abc", []Group{{0, 1}}},
|
{"a", nil, "abc", []Group{{0, 1}}},
|
||||||
{"a", "bca", []Group{{2, 3}}},
|
{"a", nil, "bca", []Group{{2, 3}}},
|
||||||
{"l", "ggllgg", []Group{{2, 3}, {3, 4}}},
|
{"l", nil, "ggllgg", []Group{{2, 3}, {3, 4}}},
|
||||||
{"(b|c)", "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}},
|
{"(b|c)", nil, "abdceb", []Group{{1, 2}, {3, 4}, {5, 6}}},
|
||||||
{"a+", "brerereraaaaabbbbb", []Group{{8, 13}}},
|
{"a+", nil, "brerereraaaaabbbbb", []Group{{8, 13}}},
|
||||||
{"ab+", "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}},
|
{"ab+", nil, "qweqweqweaqweqweabbbbbr", []Group{{16, 22}}},
|
||||||
{"(b|c|A)", "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}},
|
{"(b|c|A)", nil, "ooaoobocA", []Group{{5, 6}, {7, 8}, {8, 9}}},
|
||||||
{"ab*", "a", []Group{{0, 1}}},
|
{"ab*", nil, "a", []Group{{0, 1}}},
|
||||||
{"ab*", "abb", []Group{{0, 3}}},
|
{"ab*", nil, "abb", []Group{{0, 3}}},
|
||||||
{"a*b", "aaab", []Group{{0, 4}}},
|
{"a*b", nil, "aaab", []Group{{0, 4}}},
|
||||||
{"a*b", "qwqw", []Group{}},
|
{"a*b", nil, "qwqw", []Group{}},
|
||||||
{"(abc)*", "abcabcabc", []Group{{0, 9}, {9, 9}}},
|
{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
|
||||||
{"((abc)|(def))*", "abcdef", []Group{{0, 6}, {6, 6}}},
|
{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
|
||||||
{"(abc)*|(def)*", "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
|
{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
|
||||||
{"b*a*a", "bba", []Group{{0, 3}}},
|
{"b*a*a", nil, "bba", []Group{{0, 3}}},
|
||||||
{"(ab)+", "abcabddd", []Group{{0, 2}, {3, 5}}},
|
{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
|
||||||
{"a(b(c|d)*)*", "abccbd", []Group{{0, 6}}},
|
{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
|
||||||
{"a(b|c)*d+", "abccdd", []Group{{0, 6}}},
|
{"a(b|c)*d+", nil, "abccdd", []Group{{0, 6}}},
|
||||||
{"a*", "", []Group{{0, 0}}},
|
{"a*", nil, "", []Group{{0, 0}}},
|
||||||
{"a|b", "c", []Group{}},
|
{"a|b", nil, "c", []Group{}},
|
||||||
{"(a|b)*c", "aabbc", []Group{{0, 5}}},
|
{"(a|b)*c", nil, "aabbc", []Group{{0, 5}}},
|
||||||
{"a(b|b)", "ab", []Group{{0, 2}}},
|
{"a(b|b)", nil, "ab", []Group{{0, 2}}},
|
||||||
{"a*", "aaaaaaaa", []Group{{0, 8}, {8, 8}}},
|
{"a*", nil, "aaaaaaaa", []Group{{0, 8}, {8, 8}}},
|
||||||
|
|
||||||
{"ab?", "ab", []Group{{0, 2}}},
|
{"ab?", nil, "ab", []Group{{0, 2}}},
|
||||||
{"a?b", "ab", []Group{{0, 2}}},
|
{"a?b", nil, "ab", []Group{{0, 2}}},
|
||||||
{"a?", "", []Group{{0, 0}}},
|
{"a?", nil, "", []Group{{0, 0}}},
|
||||||
{"a?b?c", "a", []Group{}},
|
{"a?b?c", nil, "a", []Group{}},
|
||||||
{"a?b?c?", "ab", []Group{{0, 2}, {2, 2}}},
|
{"a?b?c?", nil, "ab", []Group{{0, 2}, {2, 2}}},
|
||||||
{"a?b?c?", "ac", []Group{{0, 2}, {2, 2}}},
|
{"a?b?c?", nil, "ac", []Group{{0, 2}, {2, 2}}},
|
||||||
{"a?b?c", "abc", []Group{{0, 3}}},
|
{"a?b?c", nil, "abc", []Group{{0, 3}}},
|
||||||
{"a?b?c", "acb", []Group{{0, 2}}},
|
{"a?b?c", nil, "acb", []Group{{0, 2}}},
|
||||||
|
|
||||||
{"[abc]", "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}},
|
{"[abc]", nil, "defadefbdefce", []Group{{3, 4}, {7, 8}, {11, 12}}},
|
||||||
{"[ab]c", "ab", []Group{}},
|
{"[ab]c", nil, "ab", []Group{}},
|
||||||
{"g[ab]c", "gac", []Group{{0, 3}}},
|
{"g[ab]c", nil, "gac", []Group{{0, 3}}},
|
||||||
{"g[ab]c", "gbc", []Group{{0, 3}}},
|
{"g[ab]c", nil, "gbc", []Group{{0, 3}}},
|
||||||
{"g[ab]c", "gc", []Group{}},
|
{"g[ab]c", nil, "gc", []Group{}},
|
||||||
{"g[ab]c", "gfc", []Group{}},
|
{"g[ab]c", nil, "gfc", []Group{}},
|
||||||
{"[ab]*", "aabbbabaababab", []Group{{0, 14}, {14, 14}}},
|
{"[ab]*", nil, "aabbbabaababab", []Group{{0, 14}, {14, 14}}},
|
||||||
{"[ab]+", "aabbbablaababab", []Group{{0, 7}, {8, 15}}},
|
{"[ab]+", nil, "aabbbablaababab", []Group{{0, 7}, {8, 15}}},
|
||||||
{"[Ff]r[Uu]it", "fruit", []Group{{0, 5}}},
|
{"[Ff]r[Uu]it", nil, "fruit", []Group{{0, 5}}},
|
||||||
{"[Ff]r[Uu]it", "FrUit", []Group{{0, 5}}},
|
{"[Ff]r[Uu]it", nil, "FrUit", []Group{{0, 5}}},
|
||||||
{"[Ff]r[Uu|]it", "Fr|it", []Group{{0, 5}}},
|
{"[Ff]r[Uu|]it", nil, "Fr|it", []Group{{0, 5}}},
|
||||||
{"[Ff]r([Uu]|[pP])it", "Frpit", []Group{{0, 5}}},
|
{"[Ff]r([Uu]|[pP])it", nil, "Frpit", []Group{{0, 5}}},
|
||||||
{"[Ff]r[Uu]|[pP]it", "Frpit", []Group{{2, 5}}},
|
{"[Ff]r[Uu]|[pP]it", nil, "Frpit", []Group{{2, 5}}},
|
||||||
{"[a-zA-Z]+", "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}},
|
{"[a-zA-Z]+", nil, "Hello, how is it going?", []Group{{0, 5}, {7, 10}, {11, 13}, {14, 16}, {17, 22}}},
|
||||||
|
|
||||||
{".+", "Hello, how is it going?", []Group{{0, 23}}},
|
{".+", nil, "Hello, how is it going?", []Group{{0, 23}}},
|
||||||
{"a.", "a ", []Group{{0, 2}}},
|
{"a.", nil, "a ", []Group{{0, 2}}},
|
||||||
{"a.b", "a/b", []Group{{0, 3}}},
|
{"a.b", nil, "a/b", []Group{{0, 3}}},
|
||||||
{".", "a ", []Group{{0, 1}, {1, 2}}},
|
{".", nil, "a ", []Group{{0, 1}, {1, 2}}},
|
||||||
{"a.", "a ", []Group{{0, 2}}},
|
{"a.", nil, "a ", []Group{{0, 2}}},
|
||||||
{".+b", "abc", []Group{{0, 2}}},
|
{".+b", nil, "abc", []Group{{0, 2}}},
|
||||||
|
|
||||||
{`\d`, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}},
|
{`\d`, nil, "1a0a3s'''34343s", []Group{{0, 1}, {2, 3}, {4, 5}, {9, 10}, {10, 11}, {11, 12}, {12, 13}, {13, 14}}},
|
||||||
{`\\`, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}},
|
{`\\`, nil, `a\b\c\qwe\`, []Group{{1, 2}, {3, 4}, {5, 6}, {9, 10}}},
|
||||||
{`\W`, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}},
|
{`\W`, nil, `"Hello", he said. How are you doing?`, []Group{{0, 1}, {6, 7}, {7, 8}, {8, 9}, {11, 12}, {16, 17}, {17, 18}, {21, 22}, {25, 26}, {29, 30}, {35, 36}}},
|
||||||
{`\w`, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}},
|
{`\w`, nil, ";';';';';'qwe12", []Group{{10, 11}, {11, 12}, {12, 13}, {13, 14}, {14, 15}}},
|
||||||
{`\s`, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
{`\s`, nil, "a b c d", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
||||||
{`\<`, "<HTML><body>", []Group{{0, 1}, {6, 7}}},
|
{`\<`, nil, "<HTML><body>", []Group{{0, 1}, {6, 7}}},
|
||||||
{`\(.+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}},
|
{`\(.+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 35}}},
|
||||||
|
|
||||||
{"[^abc]+", "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}},
|
{"[^abc]+", nil, "qarbtopsaplpclkpasdmb prejip0r,p", []Group{{0, 1}, {2, 3}, {4, 8}, {9, 12}, {13, 16}, {17, 20}, {21, 32}}},
|
||||||
{"[^a]+", "qqqaq", []Group{{0, 3}, {4, 5}}},
|
{"[^a]+", nil, "qqqaq", []Group{{0, 3}, {4, 5}}},
|
||||||
{"[^0-9]+", "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}},
|
{"[^0-9]+", nil, "a1b2c3dd", []Group{{0, 1}, {2, 3}, {4, 5}, {6, 8}}},
|
||||||
{"[^abc]+", "ababababbababaccacacacaca", []Group{}},
|
{"[^abc]+", nil, "ababababbababaccacacacaca", []Group{}},
|
||||||
{`\[`, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}},
|
{`\[`, nil, "a[b[c[]]]", []Group{{1, 2}, {3, 4}, {5, 6}}},
|
||||||
{`\([^)]+\)`, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}},
|
{`\([^)]+\)`, nil, "Not (paranthesized), (so) is (this) not", []Group{{4, 19}, {21, 25}, {29, 35}}},
|
||||||
|
|
||||||
{"^ab", "ab bab", []Group{{0, 2}}},
|
{"^ab", nil, "ab bab", []Group{{0, 2}}},
|
||||||
{"^aaaa^", "aaaaaaaa", []Group{}},
|
{"^aaaa^", nil, "aaaaaaaa", []Group{}},
|
||||||
{"^([bB][Gg])", "bG", []Group{{0, 2}}},
|
{"^([bB][Gg])", nil, "bG", []Group{{0, 2}}},
|
||||||
{"b$", "ba", []Group{}},
|
{"b$", nil, "ba", []Group{}},
|
||||||
{"(boy|girl)$", "girlf", []Group{}},
|
{"(boy|girl)$", nil, "girlf", []Group{}},
|
||||||
{`\bint\b`, "print int integer", []Group{{6, 9}}},
|
{`\bint\b`, nil, "print int integer", []Group{{6, 9}}},
|
||||||
{`int\b`, "ints", []Group{}},
|
{`int\b`, nil, "ints", []Group{}},
|
||||||
{`int(\b|a)`, "inta", []Group{{0, 4}}},
|
{`int(\b|a)`, nil, "inta", []Group{{0, 4}}},
|
||||||
{`\b\d+\b`, "511 a3 43", []Group{{0, 3}, {7, 9}}},
|
{`\b\d+\b`, nil, "511 a3 43", []Group{{0, 3}, {7, 9}}},
|
||||||
{`\Bint\B`, "prints int integer print", []Group{{2, 5}}},
|
{`\Bint\B`, nil, "prints int integer print", []Group{{2, 5}}},
|
||||||
{`^`, "5^3^2", []Group{{0, 0}}},
|
{`^`, nil, "5^3^2", []Group{{0, 0}}},
|
||||||
{`\^`, "5^3^2", []Group{{1, 2}, {3, 4}}},
|
{`\^`, nil, "5^3^2", []Group{{1, 2}, {3, 4}}},
|
||||||
{`pool$`, "pool carpool", []Group{{8, 12}}},
|
{`pool$`, nil, "pool carpool", []Group{{8, 12}}},
|
||||||
{`^int$`, "print int integer", []Group{}},
|
{`^int$`, nil, "print int integer", []Group{}},
|
||||||
{`^int$`, "int", []Group{{0, 3}}},
|
{`^int$`, nil, "int", []Group{{0, 3}}},
|
||||||
{`b*`, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}},
|
{`b*`, nil, "aaaaaaaaaaqweqwe", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}, {12, 12}, {13, 13}, {14, 14}, {15, 15}, {16, 16}}},
|
||||||
|
|
||||||
{"a{4}", "aabaaa", []Group{}},
|
{"a{4}", nil, "aabaaa", []Group{}},
|
||||||
{"ab{5}", "abbbbbab", []Group{{0, 6}}},
|
{"ab{5}", nil, "abbbbbab", []Group{{0, 6}}},
|
||||||
{"(a|b){3,4}", "aba", []Group{{0, 3}}},
|
{"(a|b){3,4}", nil, "aba", []Group{{0, 3}}},
|
||||||
{"(a|b){3,4}", "ababaa", []Group{{0, 4}}},
|
{"(a|b){3,4}", nil, "ababaa", []Group{{0, 4}}},
|
||||||
{"(bc){5,}", "bcbcbcbcbcbcbcbc", []Group{{0, 16}}},
|
{"(bc){5,}", nil, "bcbcbcbcbcbcbcbc", []Group{{0, 16}}},
|
||||||
{`\d{3,4}`, "1209", []Group{{0, 4}}},
|
{`\d{3,4}`, nil, "1209", []Group{{0, 4}}},
|
||||||
{`\d{3,4}`, "109", []Group{{0, 3}}},
|
{`\d{3,4}`, nil, "109", []Group{{0, 3}}},
|
||||||
{`\d{3,4}`, "5", []Group{}},
|
{`\d{3,4}`, nil, "5", []Group{}},
|
||||||
{`\d{3,4}`, "123135", []Group{{0, 4}}},
|
{`\d{3,4}`, nil, "123135", []Group{{0, 4}}},
|
||||||
{`\d{3,4}`, "89a-0", []Group{}},
|
{`\d{3,4}`, nil, "89a-0", []Group{}},
|
||||||
{`\d{3,4}`, "ababab555", []Group{{6, 9}}},
|
{`\d{3,4}`, nil, "ababab555", []Group{{6, 9}}},
|
||||||
{`\bpaint\b`, "paints", []Group{}},
|
{`\bpaint\b`, nil, "paints", []Group{}},
|
||||||
{`\b\w{5}\b`, "paint", []Group{{0, 5}}},
|
{`\b\w{5}\b`, nil, "paint", []Group{{0, 5}}},
|
||||||
{`[^\w]`, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
|
{`[^\w]`, nil, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
|
||||||
{`[^\W]`, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
|
{`[^\W]`, nil, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
|
||||||
{`[\[\]]`, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
{`[\[\]]`, nil, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
||||||
|
|
||||||
// Unicode tests
|
// Unicode tests
|
||||||
{`.+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
|
{`.+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
|
||||||
{`a.b`, "a²b", []Group{{0, 3}}},
|
{`a.b`, nil, "a²b", []Group{{0, 3}}},
|
||||||
{`[^a]+`, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
|
{`[^a]+`, nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 25}}},
|
||||||
|
|
||||||
// Fun experiment - AI-generated tests
|
// Fun experiment - AI-generated tests
|
||||||
{"(abc|def|ghi)", "abcdefg", []Group{{0, 3}, {3, 6}}},
|
{"(abc|def|ghi)", nil, "abcdefg", []Group{{0, 3}, {3, 6}}},
|
||||||
{"a(b|c)d", "abcd", []Group{}},
|
{"a(b|c)d", nil, "abcd", []Group{}},
|
||||||
{"a(b|c)*d", "abcbcd", []Group{{0, 6}}},
|
{"a(b|c)*d", nil, "abcbcd", []Group{{0, 6}}},
|
||||||
{"a(b|c)+d", "abcbcd", []Group{{0, 6}}},
|
{"a(b|c)+d", nil, "abcbcd", []Group{{0, 6}}},
|
||||||
{"a(b|c)?d", "abd", []Group{{0, 3}}},
|
{"a(b|c)?d", nil, "abd", []Group{{0, 3}}},
|
||||||
{".+", "hello world", []Group{{0, 11}}},
|
{".+", nil, "hello world", []Group{{0, 11}}},
|
||||||
{"a.b", "aXb", []Group{{0, 3}}},
|
{"a.b", nil, "aXb", []Group{{0, 3}}},
|
||||||
{"a.*b", "aXb", []Group{{0, 3}}},
|
{"a.*b", nil, "aXb", []Group{{0, 3}}},
|
||||||
{"a.{2,3}b", "aXXb", []Group{{0, 4}}},
|
{"a.{2,3}b", nil, "aXXb", []Group{{0, 4}}},
|
||||||
{"a.{2,}b", "aXXXb", []Group{{0, 5}}},
|
{"a.{2,}b", nil, "aXXXb", []Group{{0, 5}}},
|
||||||
{"a.{0,3}b", "ab", []Group{{0, 2}}},
|
{"a.{0,3}b", nil, "ab", []Group{{0, 2}}},
|
||||||
{"[abc]+", "abcabc", []Group{{0, 6}}},
|
{"[abc]+", nil, "abcabc", []Group{{0, 6}}},
|
||||||
{"[a-zA-Z]+", "HelloWorld", []Group{{0, 10}}},
|
{"[a-zA-Z]+", nil, "HelloWorld", []Group{{0, 10}}},
|
||||||
{"[^abc]+", "defghi", []Group{{0, 6}}},
|
{"[^abc]+", nil, "defghi", []Group{{0, 6}}},
|
||||||
{"^hello", "hello world", []Group{{0, 5}}},
|
{"^hello", nil, "hello world", []Group{{0, 5}}},
|
||||||
{"world$", "hello world", []Group{{6, 11}}},
|
{"world$", nil, "hello world", []Group{{6, 11}}},
|
||||||
{`\bhello\b`, "hello world", []Group{{0, 5}}},
|
{`\bhello\b`, nil, "hello world", []Group{{0, 5}}},
|
||||||
{`\Bhello\B`, "hello world", []Group{}},
|
{`\Bhello\B`, nil, "hello world", []Group{}},
|
||||||
{"(hello|world)", "hello world", []Group{{0, 5}, {6, 11}}},
|
{"(hello|world)", nil, "hello world", []Group{{0, 5}, {6, 11}}},
|
||||||
{"(hello|world)+", "hello world", []Group{{0, 5}, {6, 11}}},
|
{"(hello|world)+", nil, "hello world", []Group{{0, 5}, {6, 11}}},
|
||||||
{"(hello|world)*", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
{"(hello|world)*", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
||||||
{"(hello|world)?", "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
{"(hello|world)?", nil, "hello world", []Group{{0, 5}, {5, 5}, {6, 11}, {11, 11}}},
|
||||||
{"ú.+ï", "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}},
|
{"ú.+ï", nil, "úïäö´«åæïëòöê»éãçâï«úïòíñ", []Group{{0, 22}}},
|
||||||
{"(?=hello)", "hello world", []Group{{0, 0}}},
|
{"(?=hello)", nil, "hello world", []Group{{0, 0}}},
|
||||||
{"(?!hello)", "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
{"(?!hello)", nil, "hello world", []Group{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
||||||
{"(?<=hello)", "hello world", []Group{{5, 5}}},
|
{"(?<=hello)", nil, "hello world", []Group{{5, 5}}},
|
||||||
{"(?<!hello)", "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
{"(?<!hello)", nil, "hello world", []Group{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {6, 6}, {7, 7}, {8, 8}, {9, 9}, {10, 10}, {11, 11}}},
|
||||||
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "40", []Group{{0, 2}}},
|
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "40", []Group{{0, 2}}},
|
||||||
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "040", []Group{}},
|
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "040", []Group{}},
|
||||||
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "400", []Group{{0, 3}}},
|
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "400", []Group{{0, 3}}},
|
||||||
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", "4000", []Group{}},
|
{"^((3[7-9])|([4-9][0-9])|([1-9][0-9][0-9])|(1000))$", nil, "4000", []Group{}},
|
||||||
{"a{1,3}", "aaaaa", []Group{{0, 3}, {3, 5}}},
|
{"a{1,3}", nil, "aaaaa", []Group{{0, 3}, {3, 5}}},
|
||||||
{`\\[ab\\]`, "a", []Group{}},
|
{`\\[ab\\]`, nil, "a", []Group{}},
|
||||||
{`\\[ab\\]`, `\a`, []Group{{0, 2}}},
|
{`\\[ab\\]`, nil, `\a`, []Group{{0, 2}}},
|
||||||
|
|
||||||
// Lookaround tests
|
// Lookaround tests
|
||||||
{"(?<=bo)y", "boy", []Group{{2, 3}}},
|
{"(?<=bo)y", nil, "boy", []Group{{2, 3}}},
|
||||||
{"bo(?=y)", "boy", []Group{{0, 2}}},
|
{"bo(?=y)", nil, "boy", []Group{{0, 2}}},
|
||||||
{"(?<=f)f+(?=f)", "fffff", []Group{{1, 4}}},
|
{"(?<=f)f+(?=f)", nil, "fffff", []Group{{1, 4}}},
|
||||||
{"(?<=f)f+(?=f)", "fffffa", []Group{{1, 4}}},
|
{"(?<=f)f+(?=f)", nil, "fffffa", []Group{{1, 4}}},
|
||||||
|
|
||||||
// Test cases from Python's RE test suite
|
// Test cases from Python's RE test suite
|
||||||
{`[\1]`, "\x01", []Group{{0, 1}}},
|
{`[\1]`, nil, "\x01", []Group{{0, 1}}},
|
||||||
|
|
||||||
{`\0`, "\x00", []Group{{0, 1}}},
|
{`\0`, nil, "\x00", []Group{{0, 1}}},
|
||||||
{`[\0a]`, "\x00", []Group{{0, 1}}},
|
{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
|
||||||
{`[\0a]`, "\x00", []Group{{0, 1}}},
|
{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
|
||||||
{`[a\0]`, "\x00", []Group{{0, 1}}},
|
{`[a\0]`, nil, "\x00", []Group{{0, 1}}},
|
||||||
{`[^a\0]`, "\x00", []Group{}},
|
{`[^a\0]`, nil, "\x00", []Group{}},
|
||||||
|
|
||||||
{`\a[\b]\f\n\r\t\v`, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
|
{`\a[\b]\f\n\r\t\v`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
|
||||||
{`[\a][\b][\f][\n][\r][\t][\v]`, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
|
{`[\a][\b][\f][\n][\r][\t][\v]`, nil, "\a\b\f\n\r\t\v", []Group{{0, 7}}},
|
||||||
{`\u`, "", nil},
|
{`\u`, nil, "", nil},
|
||||||
{`\xff`, "ÿ", []Group{{0, 1}}},
|
{`\xff`, nil, "ÿ", []Group{{0, 1}}},
|
||||||
{`\x00ffffffffffffff`, "\xff", []Group{}},
|
{`\x00ffffffffffffff`, nil, "\xff", []Group{}},
|
||||||
{`\x00f`, "\x0f", []Group{}},
|
{`\x00f`, nil, "\x0f", []Group{}},
|
||||||
{`\x00fe`, "\xfe", []Group{}},
|
{`\x00fe`, nil, "\xfe", []Group{}},
|
||||||
{`^\w+=(\\[\000-\277]|[^\n\\])*`, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
|
{`^\w+=(\\[\000-\277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
|
||||||
|
|
||||||
|
{`a.b`, nil, `acb`, []Group{{0, 3}}},
|
||||||
|
{`a.b`, nil, "a\nb", []Group{}},
|
||||||
|
{`a.*b`, nil, "acc\nccb", []Group{}},
|
||||||
|
{`a.{4,5}b`, nil, "acc\nccb", []Group{}},
|
||||||
|
{`a.b`, nil, "a\rb", []Group{{0, 3}}},
|
||||||
|
{`a.b`, []ReFlag{RE_MULTILINE}, "a\nb", []Group{{0, 3}}},
|
||||||
|
{`a.*b`, []ReFlag{RE_MULTILINE}, "acc\nccb", []Group{{0, 7}}},
|
||||||
|
{`a.{4,5}b`, []ReFlag{RE_MULTILINE}, "acc\nccb", []Group{{0, 7}}},
|
||||||
|
|
||||||
|
{`)`, nil, ``, nil},
|
||||||
|
{`^$`, nil, ``, []Group{{0, 0}}},
|
||||||
|
{`abc`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`abc`, nil, `xbc`, []Group{}},
|
||||||
|
{`abc`, nil, `axc`, []Group{}},
|
||||||
|
{`abc`, nil, `abx`, []Group{}},
|
||||||
|
{`abc`, nil, `xabcy`, []Group{{1, 4}}},
|
||||||
|
{`abc`, nil, `ababc`, []Group{{2, 5}}},
|
||||||
|
{`ab*c`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`ab*bc`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`ab*bc`, nil, `abbc`, []Group{{0, 4}}},
|
||||||
|
{`ab*bc`, nil, `abbbbc`, []Group{{0, 6}}},
|
||||||
|
{`ab+bc`, nil, `abbc`, []Group{{0, 4}}},
|
||||||
|
{`ab+bc`, nil, `abc`, []Group{}},
|
||||||
|
{`ab+bc`, nil, `abq`, []Group{}},
|
||||||
|
{`ab+bc`, nil, `abbbbc`, []Group{{0, 6}}},
|
||||||
|
{`ab?bc`, nil, `abbc`, []Group{{0, 4}}},
|
||||||
|
{`ab?bc`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`ab?bc`, nil, `abbbbc`, []Group{}},
|
||||||
|
{`ab?c`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`^abc$`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`^abc$`, nil, `abcc`, []Group{}},
|
||||||
|
{`^abc`, nil, `abcc`, []Group{{0, 3}}},
|
||||||
|
{`^abc$`, nil, `aabc`, []Group{}},
|
||||||
|
{`abc$`, nil, `aabc`, []Group{{1, 4}}},
|
||||||
|
{`^`, nil, `abc`, []Group{{0, 0}}},
|
||||||
|
{`$`, nil, `abc`, []Group{{3, 3}}},
|
||||||
|
{`a.c`, nil, `abc`, []Group{{0, 3}}},
|
||||||
|
{`a.c`, nil, `axc`, []Group{{0, 3}}},
|
||||||
|
{`a.*c`, nil, `axyzc`, []Group{{0, 5}}},
|
||||||
|
{`a.*c`, nil, `axyzd`, []Group{}},
|
||||||
|
{`a[bc]d`, nil, `abc`, []Group{}},
|
||||||
|
{`a[bc]d`, nil, `abd`, []Group{{0, 3}}},
|
||||||
|
{`a[b-d]e`, nil, `abd`, []Group{}},
|
||||||
|
{`a[b-d]e`, nil, `ace`, []Group{{0, 3}}},
|
||||||
|
{`a[b-d]`, nil, `aac`, []Group{{1, 3}}},
|
||||||
|
{`a[-b]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
|
||||||
|
{`a[\-b]`, nil, `a-`, []Group{{0, 2}}},
|
||||||
|
{`a[b-]`, nil, `a-`, []Group{{0, 2}}}, // If a character class has a hyphen without a start or end character, it is treated as a literal hyphen
|
||||||
|
|
||||||
|
{`a[]b`, nil, `-`, nil},
|
||||||
|
{`a[`, nil, `-`, nil},
|
||||||
|
{`a\`, nil, `-`, nil},
|
||||||
|
{`abc)`, nil, `-`, nil},
|
||||||
|
{`(abc`, nil, `-`, nil},
|
||||||
|
{`a]`, nil, `a]`, []Group{{0, 2}}},
|
||||||
|
{`a[]]b`, nil, `a]b`, []Group{{0, 3}}},
|
||||||
|
{`a[\]]b`, nil, `a]b`, []Group{{0, 3}}},
|
||||||
|
{`a[^bc]d`, nil, `aed`, []Group{{0, 3}}},
|
||||||
|
{`a[^bc]d`, nil, `abd`, []Group{}},
|
||||||
|
{`a[^-b]c`, nil, `adc`, []Group{{0, 3}}},
|
||||||
|
{`a[^-b]c`, nil, `a-c`, []Group{}},
|
||||||
|
{`a[^]b]c`, nil, `a]c`, []Group{}},
|
||||||
|
{`a[^]b]c`, nil, `adc`, []Group{{0, 3}}},
|
||||||
|
{`\ba\b`, nil, `a-`, []Group{{0, 1}}},
|
||||||
|
{`\ba\b`, nil, `-a`, []Group{{1, 2}}},
|
||||||
|
{`\ba\b`, nil, `-a-`, []Group{{1, 2}}},
|
||||||
|
{`\by\b`, nil, `xy`, []Group{}},
|
||||||
|
{`\by\b`, nil, `yz`, []Group{}},
|
||||||
|
{`\by\b`, nil, `xyz`, []Group{}},
|
||||||
|
{`x\b`, nil, `xyz`, []Group{}},
|
||||||
|
{`x\B`, nil, `xyz`, []Group{{0, 1}}},
|
||||||
|
{`\Bz`, nil, `xyz`, []Group{{2, 3}}},
|
||||||
|
{`z\B`, nil, `xyz`, []Group{}},
|
||||||
|
{`\Bx`, nil, `xyz`, []Group{}},
|
||||||
|
{`\Ba\B`, nil, `a-`, []Group{}},
|
||||||
|
{`\Ba\B`, nil, `-a`, []Group{}},
|
||||||
|
{`\Ba\B`, nil, `-a-`, []Group{}},
|
||||||
|
{`\By\B`, nil, `xy`, []Group{}},
|
||||||
|
{`\By\B`, nil, `yz`, []Group{}},
|
||||||
|
{`\By\b`, nil, `xy`, []Group{{1, 2}}},
|
||||||
|
{`\by\B`, nil, `yz`, []Group{{0, 1}}},
|
||||||
|
{`\By\B`, nil, `xyz`, []Group{{1, 2}}},
|
||||||
|
{`ab|cd`, nil, `abc`, []Group{{0, 2}}},
|
||||||
|
{`ab|cd`, nil, `abcd`, []Group{{0, 2}, {2, 4}}},
|
||||||
|
|
||||||
// Todo - add numeric range tests
|
// Todo - add numeric range tests
|
||||||
}
|
}
|
||||||
|
|
||||||
var groupTests = []struct {
|
var groupTests = []struct {
|
||||||
re string
|
re string
|
||||||
|
flags []ReFlag
|
||||||
str string
|
str string
|
||||||
result []Match
|
result []Match
|
||||||
}{
|
}{
|
||||||
{"(a)(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
|
{"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
|
||||||
{"((a))(b)", "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
|
{"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
|
||||||
{"(0)", "ab", []Match{[]Group{}}},
|
{"(0)", nil, "ab", []Match{[]Group{}}},
|
||||||
{"(a)b", "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
|
{"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
|
||||||
{"a(b)", "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
|
{"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
|
||||||
{"(a|b)", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
|
{"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
|
||||||
{"(a)|(b)", "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}},
|
{"(a)|(b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}, {-1, -1}}, []Group{{1, 2}, {-1, -1}, {1, 2}}}},
|
||||||
{"(a+)(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}},
|
{"(a+)(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 3}, {3, 4}}}},
|
||||||
{"(a+)|(a)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
||||||
{"(a+)(aa)", "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
|
{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
|
||||||
{"(aaaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
||||||
{"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
|
{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
|
||||||
{"(aaa)|(aaaa)", "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
|
{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
|
||||||
{"(aaaa)|(aaa)", "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
|
||||||
{"(a)|(aa)", "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
|
{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
|
||||||
{"(a?)a?", "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
|
{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
|
||||||
{"(a?)a?", "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
|
{"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
|
||||||
{"(a?)a?", "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
|
{"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
|
||||||
{"a((b.d){3})", "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}},
|
{"a((b.d){3})", nil, "abfdbhdbid", []Match{[]Group{{0, 10}, {1, 10}, {7, 10}}}},
|
||||||
{`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, `abcdefghijkl9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}},
|
{`(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\071`, nil, `abcdefghijkl9`, []Match{[]Group{{0, 13}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {10, 11}, {11, 12}}}},
|
||||||
|
{`()ef`, nil, `def`, []Match{[]Group{{1, 3}, {1, 1}}}},
|
||||||
|
{`(?:)ef`, nil, `def`, []Match{[]Group{{1, 3}}}},
|
||||||
|
{`(?:)`, nil, `def`, []Match{[]Group{{0, 0}}, []Group{{1, 1}}, []Group{{2, 2}}, []Group{{3, 3}}}},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFindAllMatches(t *testing.T) {
|
func TestFindAllMatches(t *testing.T) {
|
||||||
for _, test := range reTests {
|
for _, test := range reTests {
|
||||||
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
||||||
regComp, err := Compile(test.re)
|
regComp, err := Compile(test.re, test.flags...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(fmt.Errorf("Test Error: %v", err))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
matchIndices := FindAllMatches(regComp, test.str)
|
matchIndices := FindAllMatches(regComp, test.str)
|
||||||
@@ -234,7 +325,7 @@ func TestFindAllMatches(t *testing.T) {
|
|||||||
func TestFindString(t *testing.T) {
|
func TestFindString(t *testing.T) {
|
||||||
for _, test := range reTests {
|
for _, test := range reTests {
|
||||||
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
||||||
regComp, err := Compile(test.re)
|
regComp, err := Compile(test.re, test.flags...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
@@ -259,7 +350,7 @@ func TestFindString(t *testing.T) {
|
|||||||
func TestFindAllGroups(t *testing.T) {
|
func TestFindAllGroups(t *testing.T) {
|
||||||
for _, test := range groupTests {
|
for _, test := range groupTests {
|
||||||
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
||||||
regComp, err := Compile(test.re)
|
regComp, err := Compile(test.re, test.flags...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if test.result != nil {
|
if test.result != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
|
Reference in New Issue
Block a user