Enforce the rule that character classes must have at least one character; interpret literal closing brackets as regular characters
This commit is contained in:
22
compile.go
22
compile.go
@@ -272,7 +272,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
|
6. If current character is '{', find the appropriate numeric specifier (range start, range end). Apply the range to the postfixNode at the end of outQueue.
|
||||||
*/
|
*/
|
||||||
c := re_postfix[i]
|
c := re_postfix[i]
|
||||||
if isNormalChar(c) {
|
if isNormalChar(c) || isSpecialCharWithMetacharReplacement(c) {
|
||||||
if caseInsensitive {
|
if caseInsensitive {
|
||||||
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
|
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
|
||||||
} else {
|
} else {
|
||||||
@@ -280,7 +280,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// Escape character
|
|
||||||
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
|
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
|
||||||
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
|
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
|
||||||
return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
|
return nil, fmt.Errorf("ERROR: Backslash with no escape character.")
|
||||||
@@ -412,8 +412,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if c == LBRACKET { // Used for character classes
|
if c == LBRACKET { // Used for character classes
|
||||||
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
|
firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
|
||||||
i++ // Step forward so we can look at the character class
|
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
|
||||||
|
i++ // Step forward so we can look at the character class
|
||||||
var invertMatch bool
|
var invertMatch bool
|
||||||
if re_postfix[i] == '^' {
|
if re_postfix[i] == '^' {
|
||||||
invertMatch = true
|
invertMatch = true
|
||||||
@@ -421,7 +422,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
|
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
|
||||||
for i < len(re_postfix) {
|
for i < len(re_postfix) {
|
||||||
if re_postfix[i] == RBRACKET {
|
if firstCharAdded && re_postfix[i] == RBRACKET {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if re_postfix[i] == CHAR_RANGE {
|
if re_postfix[i] == CHAR_RANGE {
|
||||||
@@ -481,9 +482,20 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
|
||||||
|
switch re_postfix[i] {
|
||||||
|
case LBRACKET:
|
||||||
|
chars = append(chars, newPostfixCharNode('['))
|
||||||
|
case RBRACKET:
|
||||||
|
chars = append(chars, newPostfixCharNode(']'))
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("Error parsing high-range unicode value in character class.")
|
||||||
|
}
|
||||||
|
}
|
||||||
chars = append(chars, newPostfixCharNode(re_postfix[i]))
|
chars = append(chars, newPostfixCharNode(re_postfix[i]))
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
firstCharAdded = true
|
||||||
|
|
||||||
if endOfRange { // The previous character was an unescaped hyphen, which (in the context of a character class) means the character that was last appended is the end of a character range
|
if endOfRange { // The previous character was an unescaped hyphen, which (in the context of a character class) means the character that was last appended is the end of a character range
|
||||||
// Things to note:
|
// Things to note:
|
||||||
|
Reference in New Issue
Block a user