From 1bafdcdb7e14e531aefc65a58c9cdb43df4230f5 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Wed, 30 Oct 2024 09:33:25 -0400 Subject: [PATCH] Added support for inverted matches; moved escape character detection to its own function --- main.go | 51 +++++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/main.go b/main.go index bc96293..1c05625 100644 --- a/main.go +++ b/main.go @@ -44,6 +44,16 @@ func shuntingYard(re string) []postfixNode { for i < len(re_runes) { re_postfix = append(re_postfix, re_runes[i]) if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped + re_postfix[len(re_postfix)-1] = LBRACKET // Replace the '[' character with LBRACKET. This allows for easier parsing og all characters (including opening and closing brackets) within the character class + invertMatch := false + toAppend := make([]rune, 0) // Holds all the runes in the current character class + if i < len(re_runes)-1 && re_runes[i+1] == '^' { // Inverting class - match everything NOT in brackets + invertMatch = true + i++ + } + if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic. + panic("Empty character class.") + } for re_runes[i] != ']' { i++ // Skip all characters inside brackets // TODO: Check for escaped characters @@ -57,15 +67,21 @@ func shuntingYard(re string) []postfixNode { } for i := rangeStart; i <= rangeEnd; i++ { - re_postfix = append(re_postfix, i) + toAppend = append(toAppend, i) } i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop) continue } - re_postfix = append(re_postfix, re_runes[i]) + toAppend = append(toAppend, re_runes[i]) } - continue + // Replace the last character (which should have been ']', with RBRACKET + toAppend[len(toAppend)-1] = RBRACKET + if invertMatch { + toAppend = setDifference(dotChars(), toAppend) // Take the inverse of the set by getting the difference between it and all dot characters + toAppend = append(toAppend, RBRACKET) // Since RBRACKET doesn't exist in dotChars, it wouldn't have been return in setDifference. We manually append it here. + } + re_postfix = append(re_postfix, toAppend...) } if (re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped if i < len(re_runes)-1 { @@ -104,28 +120,7 @@ func shuntingYard(re string) []postfixNode { panic("ERROR: Backslash with no escape character.") } i++ - switch re_postfix[i] { - case 's': // Whitespace - outQueue = append(outQueue, newPostfixNode(whitespaceChars...)) - case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it - outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool { - return slices.Contains(whitespaceChars, r) - })...)) - case 'd': // Digits - outQueue = append(outQueue, newPostfixNode(digitChars...)) - case 'D': // Non-digits - same fancy way as 'S' - outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool { - return slices.Contains(digitChars, r) - })...)) - case 'w': // word character - outQueue = append(outQueue, newPostfixNode(wordChars...)) - case 'W': // Non-word character - same fancy way as 'S' and 'D' - outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool { - return slices.Contains(wordChars, r) - })...)) - default: // None of the above - append it as a regular character - outQueue = append(outQueue, newPostfixCharNode(re_postfix[i])) - } + outQueue = append(outQueue, newEscapedNode(re_postfix[i])) continue // Escaped character will automatically be skipped when loop variable increments } @@ -153,11 +148,11 @@ func shuntingYard(re string) []postfixNode { } } } - if c == '[' { // Used for character classes + if c == LBRACKET { // Used for character classes i++ // Step forward so we can look at the character class chars := make([]rune, 0) // List of characters - used only for character classes for i < len(re_postfix) { - if re_postfix[i] == ']' { + if re_postfix[i] == RBRACKET { break } chars = append(chars, re_postfix[i]) @@ -167,7 +162,7 @@ func shuntingYard(re string) []postfixNode { panic("ERROR: Opening bracket without closing bracket.") } outQueue = append(outQueue, newPostfixNode(chars...)) - i++ // Step forward to skip closing bracket + // i++ // Step forward to skip closing bracket continue } if c == '(' {