Changed the value of EPSILON, so that we can use the NUL character

(which it used to be) in a regex; Also added code to detect escaped
backslashes

Specifically, I replace an escaped backslash with a metacharacter, then
replace it back later on. This prevents problems, like detecting whether
the opening bracket is escaped in '\\[a]'.
master
Aadhavan Srinivasan 12 hours ago
parent 0fb78abf7f
commit 25cb79f01b

@ -81,6 +81,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// //
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:' // Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
// I take this out, and put in a special character - NONCAPLPAREN_CHAR. // I take this out, and put in a special character - NONCAPLPAREN_CHAR.
//
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
// whether '\\[a]' has an escaped opening bracket (it doesn't).
for i := 0; i < len(re_runes_orig); i++ { for i := 0; i < len(re_runes_orig); i++ {
c := re_runes_orig[i] c := re_runes_orig[i]
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) { if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
@ -115,6 +119,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else if c == '(' && i < len(re_runes_orig)-2 && re_runes_orig[i+1] == '?' && re_runes_orig[i+2] == ':' { } else if c == '(' && i < len(re_runes_orig)-2 && re_runes_orig[i+1] == '?' && re_runes_orig[i+2] == ':' {
re_runes = append(re_runes, NONCAPLPAREN_CHAR) re_runes = append(re_runes, NONCAPLPAREN_CHAR)
i += 2 i += 2
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
re_runes = append(re_runes, ESC_BACKSLASH)
i++
} else { } else {
re_runes = append(re_runes, c) re_runes = append(re_runes, c)
} }
@ -671,6 +678,11 @@ func thompson(re []postfixNode) (Reg, error) {
} }
} }
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
replaceByValue(state.except, ESC_BACKSLASH, '\\')
nfa = append(nfa, &state) nfa = append(nfa, &state)
} }
if c.nodetype == LPAREN || c.nodetype == RPAREN { if c.nodetype == LPAREN || c.nodetype == RPAREN {

@ -4,7 +4,7 @@ import (
"slices" "slices"
) )
const EPSILON int = 0 const EPSILON int = 0xF0000
type assertType int type assertType int

Loading…
Cancel
Save