Changed the value of EPSILON, so that we can use the NUL character
(which it used to be) in a regex; Also added code to detect escaped backslashes Specifically, I replace an escaped backslash with a metacharacter, then replace it back later on. This prevents problems, like detecting whether the opening bracket is escaped in '\\[a]'.
This commit is contained in:
12
compile.go
12
compile.go
@@ -81,6 +81,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
//
|
//
|
||||||
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
||||||
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
||||||
|
//
|
||||||
|
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
|
||||||
|
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
|
||||||
|
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
||||||
for i := 0; i < len(re_runes_orig); i++ {
|
for i := 0; i < len(re_runes_orig); i++ {
|
||||||
c := re_runes_orig[i]
|
c := re_runes_orig[i]
|
||||||
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
|
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
|
||||||
@@ -115,6 +119,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
} else if c == '(' && i < len(re_runes_orig)-2 && re_runes_orig[i+1] == '?' && re_runes_orig[i+2] == ':' {
|
} else if c == '(' && i < len(re_runes_orig)-2 && re_runes_orig[i+1] == '?' && re_runes_orig[i+2] == ':' {
|
||||||
re_runes = append(re_runes, NONCAPLPAREN_CHAR)
|
re_runes = append(re_runes, NONCAPLPAREN_CHAR)
|
||||||
i += 2
|
i += 2
|
||||||
|
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
|
||||||
|
re_runes = append(re_runes, ESC_BACKSLASH)
|
||||||
|
i++
|
||||||
} else {
|
} else {
|
||||||
re_runes = append(re_runes, c)
|
re_runes = append(re_runes, c)
|
||||||
}
|
}
|
||||||
@@ -671,6 +678,11 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
|
||||||
|
replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
|
||||||
|
replaceByValue(state.except, ESC_BACKSLASH, '\\')
|
||||||
|
|
||||||
nfa = append(nfa, &state)
|
nfa = append(nfa, &state)
|
||||||
}
|
}
|
||||||
if c.nodetype == LPAREN || c.nodetype == RPAREN {
|
if c.nodetype == LPAREN || c.nodetype == RPAREN {
|
||||||
|
Reference in New Issue
Block a user