diff --git a/main.go b/main.go index c8a1b4e..d5771b9 100644 --- a/main.go +++ b/main.go @@ -47,13 +47,12 @@ func shuntingYard(re string) []postfixNode { i := 0 for i < len(re_runes) { re_postfix = append(re_postfix, re_runes[i]) - if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped - re_postfix[len(re_postfix)-1] = LBRACKET // Replace the '[' character with LBRACKET. This allows for easier parsing of all characters (including opening and closing brackets) within the character class - invertMatch := false + if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped. Inside this block, the only task is to expand character ranges into their constituent characters. + re_postfix[len(re_postfix)-1] = LBRACKET // Replace the '[' character with LBRACKET. This allows for easier parsing of all characters (including opening and closing brackets) within the character class toAppend := make([]rune, 0) // Holds all the runes in the current character class if i < len(re_runes)-1 && re_runes[i+1] == '^' { // Inverting class - match everything NOT in brackets - invertMatch = true - i++ + re_postfix = append(re_postfix, '^') + i++ // Skip opening bracket and caret } if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic. panic("Empty character class.") @@ -81,13 +80,9 @@ func shuntingYard(re string) []postfixNode { } // Replace the last character (which should have been ']', with RBRACKET toAppend[len(toAppend)-1] = RBRACKET - if invertMatch { - toAppend = setDifference(dotChars(), toAppend) // Take the inverse of the set by getting the difference between it and all dot characters - toAppend = append(toAppend, RBRACKET) // Since RBRACKET doesn't exist in dotChars, it wouldn't have been return in setDifference. We manually append it here. - } re_postfix = append(re_postfix, toAppend...) } - if re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either + if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either i++ // Skip opening brace for i < len(re_runes) && re_runes[i] != '}' { re_postfix = append(re_postfix, re_runes[i]) @@ -98,7 +93,7 @@ func shuntingYard(re string) []postfixNode { } re_postfix = append(re_postfix, re_runes[i]) // Append closing brace } - if (re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped + if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped if i < len(re_runes)-1 { if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' { re_postfix = append(re_postfix, CONCAT) @@ -172,7 +167,12 @@ func shuntingYard(re string) []postfixNode { } } if c == LBRACKET { // Used for character classes - i++ // Step forward so we can look at the character class + i++ // Step forward so we can look at the character class + var invertMatch bool + if re_postfix[i] == '^' { + invertMatch = true + i++ + } chars := make([]rune, 0) // List of characters - used only for character classes for i < len(re_postfix) { if re_postfix[i] == RBRACKET { @@ -184,8 +184,14 @@ func shuntingYard(re string) []postfixNode { if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic. panic("ERROR: Opening bracket without closing bracket.") } - outQueue = append(outQueue, newPostfixNode(chars...)) - // i++ // Step forward to skip closing bracket + if !invertMatch { + outQueue = append(outQueue, newPostfixCharNode(chars...)) + } else { + // Invert match - create an allChars postfixNode, then add the given states to its 'except' list. + toAdd := newPostfixDotNode() + toAdd.except = chars + outQueue = append(outQueue, toAdd) + } continue } if c == '{' { @@ -282,8 +288,11 @@ func thompson(re []postfixNode) *State { if c.nodetype == CHARACTER || c.nodetype == ASSERTION { state := State{} state.transitions = make(map[int][]*State) - if c.isDot { - state.isDot = true + if c.allChars { + state.allChars = true + if len(c.except) != 0 { + state.except = append([]rune{}, c.except...) + } } state.content = rune2Contents(c.contents) state.output = make([]*State, 0)