Enforce the rule that character classes must have at least one character; interpret literal closing brackets as regular characters

master
Aadhavan Srinivasan 1 week ago
parent 6fb266e0d2
commit 1520edad55

@ -272,7 +272,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue. 6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
*/ */
c := re_postfix[i] c := re_postfix[i]
if isNormalChar(c) { if isNormalChar(c) || isSpecialCharWithMetacharReplacement(c) {
if caseInsensitive { if caseInsensitive {
outQueue = append(outQueue, newPostfixNode(allCases(c)...)) outQueue = append(outQueue, newPostfixNode(allCases(c)...))
} else { } else {
@ -280,7 +280,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
continue continue
} }
// Escape character
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it) if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
return nil, fmt.Errorf("ERROR: Backslash with no escape character.") return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
@ -412,6 +412,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
} }
if c == LBRACKET { // Used for character classes if c == LBRACKET { // Used for character classes
firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
i++ // Step forward so we can look at the character class i++ // Step forward so we can look at the character class
var invertMatch bool var invertMatch bool
@ -421,7 +422,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
chars := make([]postfixNode, 0) // List of nodes - used only for character classes chars := make([]postfixNode, 0) // List of nodes - used only for character classes
for i < len(re_postfix) { for i < len(re_postfix) {
if re_postfix[i] == RBRACKET { if firstCharAdded && re_postfix[i] == RBRACKET {
break break
} }
if re_postfix[i] == CHAR_RANGE { if re_postfix[i] == CHAR_RANGE {
@ -481,9 +482,20 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i++ i++
} }
} else { } else {
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
switch re_postfix[i] {
case LBRACKET:
chars = append(chars, newPostfixCharNode('['))
case RBRACKET:
chars = append(chars, newPostfixCharNode(']'))
default:
return nil, fmt.Errorf("Error parsing high-range unicode value in character class.")
}
}
chars = append(chars, newPostfixCharNode(re_postfix[i])) chars = append(chars, newPostfixCharNode(re_postfix[i]))
i++ i++
} }
firstCharAdded = true
if endOfRange { // The previous character was an unescaped hyphen, which (in the context of a character class) means the character that was last appended is the end of a character range if endOfRange { // The previous character was an unescaped hyphen, which (in the context of a character class) means the character that was last appended is the end of a character range
// Things to note: // Things to note:

Loading…
Cancel
Save