Convert an inverting character class into an 'allChars' node, with the characters marked as exceptions

1 month ago · b3ee1fe5e8
parent 708a9e1303
commit b3ee1fe5e8
1 changed files with 25 additions and 16 deletions
--- a/main.go
+++ b/main.go
@ -47,13 +47,12 @@ func shuntingYard(re string) []postfixNode {
 	i := 0
 	for i < len(re_runes) {
 		re_postfix = append(re_postfix, re_runes[i])
-		if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped
+		if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped. Inside this block, the only task is to expand character ranges into their constituent characters.
 			re_postfix[len(re_postfix)-1] = LBRACKET         // Replace the '[' character with LBRACKET. This allows for easier parsing of all characters (including opening and closing brackets) within the character class
 			invertMatch := false
 			toAppend := make([]rune, 0)                      // Holds all the runes in the current character class
 			if i < len(re_runes)-1 && re_runes[i+1] == '^' { // Inverting class - match everything NOT in brackets
-				invertMatch = true
+				re_postfix = append(re_postfix, '^')
-				i++
+				i++ // Skip opening bracket and caret
 			}
 			if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
 				panic("Empty character class.")
@ -81,13 +80,9 @@ func shuntingYard(re string) []postfixNode {
 			}
 			// Replace the last character (which should have been ']', with RBRACKET
 			toAppend[len(toAppend)-1] = RBRACKET
 			if invertMatch {
 				toAppend = setDifference(dotChars(), toAppend) // Take the inverse of the set by getting the difference between it and all dot characters
 				toAppend = append(toAppend, RBRACKET)          // Since RBRACKET doesn't exist in dotChars, it wouldn't have been return in setDifference. We manually append it here.
 			}
 			re_postfix = append(re_postfix, toAppend...)
 		}
-		if re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either
+		if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either
 			i++ // Skip opening brace
 			for i < len(re_runes) && re_runes[i] != '}' {
 				re_postfix = append(re_postfix, re_runes[i])
@ -98,7 +93,7 @@ func shuntingYard(re string) []postfixNode {
 			}
 			re_postfix = append(re_postfix, re_runes[i]) // Append closing brace
 		}
-		if (re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
+		if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
 			if i < len(re_runes)-1 {
 				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
 					re_postfix = append(re_postfix, CONCAT)
@ -173,6 +168,11 @@ func shuntingYard(re string) []postfixNode {
 		}
 		if c == LBRACKET { // Used for character classes
 			i++ // Step forward so we can look at the character class
 			var invertMatch bool
 			if re_postfix[i] == '^' {
 				invertMatch = true
 				i++
 			}
 			chars := make([]rune, 0) // List of characters -  used only for character classes
 			for i < len(re_postfix) {
 				if re_postfix[i] == RBRACKET {
@ -184,8 +184,14 @@ func shuntingYard(re string) []postfixNode {
 			if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.
 				panic("ERROR: Opening bracket without closing bracket.")
 			}
-			outQueue = append(outQueue, newPostfixNode(chars...))
+			if !invertMatch {
-			//	i++ // Step forward to skip closing bracket
+				outQueue = append(outQueue, newPostfixCharNode(chars...))
 			} else {
 				// Invert match - create an allChars postfixNode, then add the given states to its 'except' list.
 				toAdd := newPostfixDotNode()
 				toAdd.except = chars
 				outQueue = append(outQueue, toAdd)
 			}
 			continue
 		}
 		if c == '{' {
@ -282,8 +288,11 @@ func thompson(re []postfixNode) *State {
 		if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
 			state := State{}
 			state.transitions = make(map[int][]*State)
-			if c.isDot {
+			if c.allChars {
-				state.isDot = true
+				state.allChars = true
 				if len(c.except) != 0 {
 					state.except = append([]rune{}, c.except...)
 				}
 			}
 			state.content = rune2Contents(c.contents)
 			state.output = make([]*State, 0)