Fixed edge cases with character ranges and character classes

5 months ago · 9d3c228ace
parent 5e12fe1c42
commit 9d3c228ace
1 changed files with 13 additions and 7 deletions
--- a/compile.go
+++ b/compile.go
@ -153,8 +153,12 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 			}
 			for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
 				i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
-				// TODO: Check for escaped characters
+				// Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
-				if re_runes[i] == '-' && i > 0 && re_runes[i-1] != '\\' { // Unescaped hyphen - replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
+				if i >= len(re_runes) {
 					return nil, fmt.Errorf("Opening bracket without closing bracket.")
 				}
 				if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != ']') { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
 					re_runes[i] = CHAR_RANGE
 				}
@ -491,13 +495,15 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 					//  2. To account for this, the following logic is followed:
 					// 		a. If the second-to-last postfixNode ie. the start of the range has only one element, then we are in a range.
 					// 			i. If it has more than one element, then we are actually looking at a literal hyphen, and we will treat is as such.
 					// 		   ii. If either the start or end of the range don't exist in 'chars' ie. something like [-a] or [a-], then too will we treat it as a literal hyphen.
 					// 		b. The last postfixNode added to 'chars' _must_ only have one character (because it's the end of the range).
-					endRangePostfixNode := mustPop(&chars)
+					endRangePostfixNode, err1 := pop(&chars)
-					startRangePostfixNode := mustPop(&chars)
+					startRangePostfixNode, err2 := pop(&chars)
-					if len(endRangePostfixNode.contents) != 1 {
+
-						return nil, fmt.Errorf("Error parsing character range.")
+					if (err1 != nil || err2 != nil) || len(startRangePostfixNode.contents) != 1 { // Treat it as a regular hyphen
 					} else if len(startRangePostfixNode.contents) != 1 { // This is actually a regular hyphen
 						chars = append(chars, startRangePostfixNode, newPostfixCharNode('-'), endRangePostfixNode)
 					} else if len(endRangePostfixNode.contents) != 1 { // I don't even know what this would look like, this is just a sanity check
 						return nil, fmt.Errorf("Error parsing character range.")
 					} else {
 						// We have established that they both have a length of 1
 						startRangeRune := startRangePostfixNode.contents[0]