Changed the value of EPSILON, so that we can use the NUL character

(which it used to be) in a regex; Also added code to detect escaped
backslashes

Specifically, I replace an escaped backslash with a metacharacter, then
replace it back later on. This prevents problems, like detecting whether
the opening bracket is escaped in '\\[a]'.
master
Aadhavan Srinivasan 7 hours ago
parent 0fb78abf7f
commit 25cb79f01b

@ -81,6 +81,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
//
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
//
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
// whether '\\[a]' has an escaped opening bracket (it doesn't).
for i := 0; i < len(re_runes_orig); i++ {
c := re_runes_orig[i]
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
@ -115,6 +119,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else if c == '(' && i < len(re_runes_orig)-2 && re_runes_orig[i+1] == '?' && re_runes_orig[i+2] == ':' {
re_runes = append(re_runes, NONCAPLPAREN_CHAR)
i += 2
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
re_runes = append(re_runes, ESC_BACKSLASH)
i++
} else {
re_runes = append(re_runes, c)
}
@ -671,6 +678,11 @@ func thompson(re []postfixNode) (Reg, error) {
}
}
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
replaceByValue(state.except, ESC_BACKSLASH, '\\')
nfa = append(nfa, &state)
}
if c.nodetype == LPAREN || c.nodetype == RPAREN {

@ -4,7 +4,7 @@ import (
"slices"
)
const EPSILON int = 0
const EPSILON int = 0xF0000
type assertType int

Loading…
Cancel
Save