Changed the value of EPSILON, so that we can use the NUL character
(which it used to be) in a regex; Also added code to detect escaped backslashes Specifically, I replace an escaped backslash with a metacharacter, then replace it back later on. This prevents problems, like detecting whether the opening bracket is escaped in '\\[a]'.
This commit is contained in:
12
compile.go
12
compile.go
@@ -81,6 +81,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
//
|
||||
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
||||
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
||||
//
|
||||
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
|
||||
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
|
||||
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
||||
for i := 0; i < len(re_runes_orig); i++ {
|
||||
c := re_runes_orig[i]
|
||||
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
|
||||
@@ -115,6 +119,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
} else if c == '(' && i < len(re_runes_orig)-2 && re_runes_orig[i+1] == '?' && re_runes_orig[i+2] == ':' {
|
||||
re_runes = append(re_runes, NONCAPLPAREN_CHAR)
|
||||
i += 2
|
||||
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
|
||||
re_runes = append(re_runes, ESC_BACKSLASH)
|
||||
i++
|
||||
} else {
|
||||
re_runes = append(re_runes, c)
|
||||
}
|
||||
@@ -671,6 +678,11 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
|
||||
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
|
||||
replaceByValue(state.except, ESC_BACKSLASH, '\\')
|
||||
|
||||
nfa = append(nfa, &state)
|
||||
}
|
||||
if c.nodetype == LPAREN || c.nodetype == RPAREN {
|
||||
|
Reference in New Issue
Block a user