Added support for inverted matches; moved escape character detection to its own function
This commit is contained in:
51
main.go
51
main.go
@@ -44,6 +44,16 @@ func shuntingYard(re string) []postfixNode {
|
||||
for i < len(re_runes) {
|
||||
re_postfix = append(re_postfix, re_runes[i])
|
||||
if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped
|
||||
re_postfix[len(re_postfix)-1] = LBRACKET // Replace the '[' character with LBRACKET. This allows for easier parsing og all characters (including opening and closing brackets) within the character class
|
||||
invertMatch := false
|
||||
toAppend := make([]rune, 0) // Holds all the runes in the current character class
|
||||
if i < len(re_runes)-1 && re_runes[i+1] == '^' { // Inverting class - match everything NOT in brackets
|
||||
invertMatch = true
|
||||
i++
|
||||
}
|
||||
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
|
||||
panic("Empty character class.")
|
||||
}
|
||||
for re_runes[i] != ']' {
|
||||
i++ // Skip all characters inside brackets
|
||||
// TODO: Check for escaped characters
|
||||
@@ -57,15 +67,21 @@ func shuntingYard(re string) []postfixNode {
|
||||
}
|
||||
|
||||
for i := rangeStart; i <= rangeEnd; i++ {
|
||||
re_postfix = append(re_postfix, i)
|
||||
toAppend = append(toAppend, i)
|
||||
}
|
||||
|
||||
i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)
|
||||
continue
|
||||
}
|
||||
re_postfix = append(re_postfix, re_runes[i])
|
||||
toAppend = append(toAppend, re_runes[i])
|
||||
}
|
||||
continue
|
||||
// Replace the last character (which should have been ']', with RBRACKET
|
||||
toAppend[len(toAppend)-1] = RBRACKET
|
||||
if invertMatch {
|
||||
toAppend = setDifference(dotChars(), toAppend) // Take the inverse of the set by getting the difference between it and all dot characters
|
||||
toAppend = append(toAppend, RBRACKET) // Since RBRACKET doesn't exist in dotChars, it wouldn't have been return in setDifference. We manually append it here.
|
||||
}
|
||||
re_postfix = append(re_postfix, toAppend...)
|
||||
}
|
||||
if (re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
|
||||
if i < len(re_runes)-1 {
|
||||
@@ -104,28 +120,7 @@ func shuntingYard(re string) []postfixNode {
|
||||
panic("ERROR: Backslash with no escape character.")
|
||||
}
|
||||
i++
|
||||
switch re_postfix[i] {
|
||||
case 's': // Whitespace
|
||||
outQueue = append(outQueue, newPostfixNode(whitespaceChars...))
|
||||
case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it
|
||||
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
|
||||
return slices.Contains(whitespaceChars, r)
|
||||
})...))
|
||||
case 'd': // Digits
|
||||
outQueue = append(outQueue, newPostfixNode(digitChars...))
|
||||
case 'D': // Non-digits - same fancy way as 'S'
|
||||
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
|
||||
return slices.Contains(digitChars, r)
|
||||
})...))
|
||||
case 'w': // word character
|
||||
outQueue = append(outQueue, newPostfixNode(wordChars...))
|
||||
case 'W': // Non-word character - same fancy way as 'S' and 'D'
|
||||
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
|
||||
return slices.Contains(wordChars, r)
|
||||
})...))
|
||||
default: // None of the above - append it as a regular character
|
||||
outQueue = append(outQueue, newPostfixCharNode(re_postfix[i]))
|
||||
}
|
||||
outQueue = append(outQueue, newEscapedNode(re_postfix[i]))
|
||||
continue // Escaped character will automatically be skipped when loop variable increments
|
||||
}
|
||||
|
||||
@@ -153,11 +148,11 @@ func shuntingYard(re string) []postfixNode {
|
||||
}
|
||||
}
|
||||
}
|
||||
if c == '[' { // Used for character classes
|
||||
if c == LBRACKET { // Used for character classes
|
||||
i++ // Step forward so we can look at the character class
|
||||
chars := make([]rune, 0) // List of characters - used only for character classes
|
||||
for i < len(re_postfix) {
|
||||
if re_postfix[i] == ']' {
|
||||
if re_postfix[i] == RBRACKET {
|
||||
break
|
||||
}
|
||||
chars = append(chars, re_postfix[i])
|
||||
@@ -167,7 +162,7 @@ func shuntingYard(re string) []postfixNode {
|
||||
panic("ERROR: Opening bracket without closing bracket.")
|
||||
}
|
||||
outQueue = append(outQueue, newPostfixNode(chars...))
|
||||
i++ // Step forward to skip closing bracket
|
||||
// i++ // Step forward to skip closing bracket
|
||||
continue
|
||||
}
|
||||
if c == '(' {
|
||||
|
Reference in New Issue
Block a user