Enforce the rule that character classes must have at least one character; interpret literal closing brackets as regular characters
This commit is contained in:
18
compile.go
18
compile.go
@@ -272,7 +272,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
|
||||
*/
|
||||
c := re_postfix[i]
|
||||
if isNormalChar(c) {
|
||||
if isNormalChar(c) || isSpecialCharWithMetacharReplacement(c) {
|
||||
if caseInsensitive {
|
||||
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
|
||||
} else {
|
||||
@@ -280,7 +280,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Escape character
|
||||
|
||||
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
|
||||
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
|
||||
return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
|
||||
@@ -412,6 +412,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
}
|
||||
}
|
||||
if c == LBRACKET { // Used for character classes
|
||||
firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
|
||||
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
|
||||
i++ // Step forward so we can look at the character class
|
||||
var invertMatch bool
|
||||
@@ -421,7 +422,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
}
|
||||
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
|
||||
for i < len(re_postfix) {
|
||||
if re_postfix[i] == RBRACKET {
|
||||
if firstCharAdded && re_postfix[i] == RBRACKET {
|
||||
break
|
||||
}
|
||||
if re_postfix[i] == CHAR_RANGE {
|
||||
@@ -481,9 +482,20 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
i++
|
||||
}
|
||||
} else {
|
||||
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
|
||||
switch re_postfix[i] {
|
||||
case LBRACKET:
|
||||
chars = append(chars, newPostfixCharNode('['))
|
||||
case RBRACKET:
|
||||
chars = append(chars, newPostfixCharNode(']'))
|
||||
default:
|
||||
return nil, fmt.Errorf("Error parsing high-range unicode value in character class.")
|
||||
}
|
||||
}
|
||||
chars = append(chars, newPostfixCharNode(re_postfix[i]))
|
||||
i++
|
||||
}
|
||||
firstCharAdded = true
|
||||
|
||||
if endOfRange { // The previous character was an unescaped hyphen, which (in the context of a character class) means the character that was last appended is the end of a character range
|
||||
// Things to note:
|
||||
|
Reference in New Issue
Block a user