Fixed edge cases with character ranges and character classes
This commit is contained in:
20
compile.go
20
compile.go
@@ -153,8 +153,12 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
}
|
||||
for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
|
||||
i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
|
||||
// TODO: Check for escaped characters
|
||||
if re_runes[i] == '-' && i > 0 && re_runes[i-1] != '\\' { // Unescaped hyphen - replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
|
||||
// Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
|
||||
if i >= len(re_runes) {
|
||||
return nil, fmt.Errorf("Opening bracket without closing bracket.")
|
||||
}
|
||||
|
||||
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != ']') { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
|
||||
re_runes[i] = CHAR_RANGE
|
||||
}
|
||||
|
||||
@@ -491,13 +495,15 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
// 2. To account for this, the following logic is followed:
|
||||
// a. If the second-to-last postfixNode ie. the start of the range has only one element, then we are in a range.
|
||||
// i. If it has more than one element, then we are actually looking at a literal hyphen, and we will treat is as such.
|
||||
// ii. If either the start or end of the range don't exist in 'chars' ie. something like [-a] or [a-], then too will we treat it as a literal hyphen.
|
||||
// b. The last postfixNode added to 'chars' _must_ only have one character (because it's the end of the range).
|
||||
endRangePostfixNode := mustPop(&chars)
|
||||
startRangePostfixNode := mustPop(&chars)
|
||||
if len(endRangePostfixNode.contents) != 1 {
|
||||
return nil, fmt.Errorf("Error parsing character range.")
|
||||
} else if len(startRangePostfixNode.contents) != 1 { // This is actually a regular hyphen
|
||||
endRangePostfixNode, err1 := pop(&chars)
|
||||
startRangePostfixNode, err2 := pop(&chars)
|
||||
|
||||
if (err1 != nil || err2 != nil) || len(startRangePostfixNode.contents) != 1 { // Treat it as a regular hyphen
|
||||
chars = append(chars, startRangePostfixNode, newPostfixCharNode('-'), endRangePostfixNode)
|
||||
} else if len(endRangePostfixNode.contents) != 1 { // I don't even know what this would look like, this is just a sanity check
|
||||
return nil, fmt.Errorf("Error parsing character range.")
|
||||
} else {
|
||||
// We have established that they both have a length of 1
|
||||
startRangeRune := startRangePostfixNode.contents[0]
|
||||
|
Reference in New Issue
Block a user