Fixed edge cases with character ranges and character classes

master
Aadhavan Srinivasan 1 week ago
parent 5e12fe1c42
commit 9d3c228ace

@ -153,8 +153,12 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' { for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash) i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// TODO: Check for escaped characters // Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
if re_runes[i] == '-' && i > 0 && re_runes[i-1] != '\\' { // Unescaped hyphen - replace with CHAR_RANGE. This metacharacter will be used later on to construct the range if i >= len(re_runes) {
return nil, fmt.Errorf("Opening bracket without closing bracket.")
}
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != ']') { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
re_runes[i] = CHAR_RANGE re_runes[i] = CHAR_RANGE
} }
@ -491,13 +495,15 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// 2. To account for this, the following logic is followed: // 2. To account for this, the following logic is followed:
// a. If the second-to-last postfixNode ie. the start of the range has only one element, then we are in a range. // a. If the second-to-last postfixNode ie. the start of the range has only one element, then we are in a range.
// i. If it has more than one element, then we are actually looking at a literal hyphen, and we will treat is as such. // i. If it has more than one element, then we are actually looking at a literal hyphen, and we will treat is as such.
// ii. If either the start or end of the range don't exist in 'chars' ie. something like [-a] or [a-], then too will we treat it as a literal hyphen.
// b. The last postfixNode added to 'chars' _must_ only have one character (because it's the end of the range). // b. The last postfixNode added to 'chars' _must_ only have one character (because it's the end of the range).
endRangePostfixNode := mustPop(&chars) endRangePostfixNode, err1 := pop(&chars)
startRangePostfixNode := mustPop(&chars) startRangePostfixNode, err2 := pop(&chars)
if len(endRangePostfixNode.contents) != 1 {
return nil, fmt.Errorf("Error parsing character range.") if (err1 != nil || err2 != nil) || len(startRangePostfixNode.contents) != 1 { // Treat it as a regular hyphen
} else if len(startRangePostfixNode.contents) != 1 { // This is actually a regular hyphen
chars = append(chars, startRangePostfixNode, newPostfixCharNode('-'), endRangePostfixNode) chars = append(chars, startRangePostfixNode, newPostfixCharNode('-'), endRangePostfixNode)
} else if len(endRangePostfixNode.contents) != 1 { // I don't even know what this would look like, this is just a sanity check
return nil, fmt.Errorf("Error parsing character range.")
} else { } else {
// We have established that they both have a length of 1 // We have established that they both have a length of 1
startRangeRune := startRangePostfixNode.contents[0] startRangeRune := startRangePostfixNode.contents[0]

Loading…
Cancel
Save