Added support for inverted matches; moved escape character detection to its own function

master
Aadhavan Srinivasan 2 months ago
parent 5f4a6c5a3b
commit 1bafdcdb7e

@ -44,6 +44,16 @@ func shuntingYard(re string) []postfixNode {
for i < len(re_runes) {
re_postfix = append(re_postfix, re_runes[i])
if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped
re_postfix[len(re_postfix)-1] = LBRACKET // Replace the '[' character with LBRACKET. This allows for easier parsing og all characters (including opening and closing brackets) within the character class
invertMatch := false
toAppend := make([]rune, 0) // Holds all the runes in the current character class
if i < len(re_runes)-1 && re_runes[i+1] == '^' { // Inverting class - match everything NOT in brackets
invertMatch = true
i++
}
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
panic("Empty character class.")
}
for re_runes[i] != ']' {
i++ // Skip all characters inside brackets
// TODO: Check for escaped characters
@ -57,15 +67,21 @@ func shuntingYard(re string) []postfixNode {
}
for i := rangeStart; i <= rangeEnd; i++ {
re_postfix = append(re_postfix, i)
toAppend = append(toAppend, i)
}
i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)
continue
}
re_postfix = append(re_postfix, re_runes[i])
toAppend = append(toAppend, re_runes[i])
}
continue
// Replace the last character (which should have been ']', with RBRACKET
toAppend[len(toAppend)-1] = RBRACKET
if invertMatch {
toAppend = setDifference(dotChars(), toAppend) // Take the inverse of the set by getting the difference between it and all dot characters
toAppend = append(toAppend, RBRACKET) // Since RBRACKET doesn't exist in dotChars, it wouldn't have been return in setDifference. We manually append it here.
}
re_postfix = append(re_postfix, toAppend...)
}
if (re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
if i < len(re_runes)-1 {
@ -104,28 +120,7 @@ func shuntingYard(re string) []postfixNode {
panic("ERROR: Backslash with no escape character.")
}
i++
switch re_postfix[i] {
case 's': // Whitespace
outQueue = append(outQueue, newPostfixNode(whitespaceChars...))
case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
return slices.Contains(whitespaceChars, r)
})...))
case 'd': // Digits
outQueue = append(outQueue, newPostfixNode(digitChars...))
case 'D': // Non-digits - same fancy way as 'S'
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
return slices.Contains(digitChars, r)
})...))
case 'w': // word character
outQueue = append(outQueue, newPostfixNode(wordChars...))
case 'W': // Non-word character - same fancy way as 'S' and 'D'
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
return slices.Contains(wordChars, r)
})...))
default: // None of the above - append it as a regular character
outQueue = append(outQueue, newPostfixCharNode(re_postfix[i]))
}
outQueue = append(outQueue, newEscapedNode(re_postfix[i]))
continue // Escaped character will automatically be skipped when loop variable increments
}
@ -153,11 +148,11 @@ func shuntingYard(re string) []postfixNode {
}
}
}
if c == '[' { // Used for character classes
if c == LBRACKET { // Used for character classes
i++ // Step forward so we can look at the character class
chars := make([]rune, 0) // List of characters - used only for character classes
for i < len(re_postfix) {
if re_postfix[i] == ']' {
if re_postfix[i] == RBRACKET {
break
}
chars = append(chars, re_postfix[i])
@ -167,7 +162,7 @@ func shuntingYard(re string) []postfixNode {
panic("ERROR: Opening bracket without closing bracket.")
}
outQueue = append(outQueue, newPostfixNode(chars...))
i++ // Step forward to skip closing bracket
// i++ // Step forward to skip closing bracket
continue
}
if c == '(' {

Loading…
Cancel
Save