Added support for inverted matches; moved escape character detection to its own function
This commit is contained in:
51
main.go
51
main.go
@@ -44,6 +44,16 @@ func shuntingYard(re string) []postfixNode {
|
|||||||
for i < len(re_runes) {
|
for i < len(re_runes) {
|
||||||
re_postfix = append(re_postfix, re_runes[i])
|
re_postfix = append(re_postfix, re_runes[i])
|
||||||
if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped
|
if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped
|
||||||
|
re_postfix[len(re_postfix)-1] = LBRACKET // Replace the '[' character with LBRACKET. This allows for easier parsing og all characters (including opening and closing brackets) within the character class
|
||||||
|
invertMatch := false
|
||||||
|
toAppend := make([]rune, 0) // Holds all the runes in the current character class
|
||||||
|
if i < len(re_runes)-1 && re_runes[i+1] == '^' { // Inverting class - match everything NOT in brackets
|
||||||
|
invertMatch = true
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
|
||||||
|
panic("Empty character class.")
|
||||||
|
}
|
||||||
for re_runes[i] != ']' {
|
for re_runes[i] != ']' {
|
||||||
i++ // Skip all characters inside brackets
|
i++ // Skip all characters inside brackets
|
||||||
// TODO: Check for escaped characters
|
// TODO: Check for escaped characters
|
||||||
@@ -57,15 +67,21 @@ func shuntingYard(re string) []postfixNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i := rangeStart; i <= rangeEnd; i++ {
|
for i := rangeStart; i <= rangeEnd; i++ {
|
||||||
re_postfix = append(re_postfix, i)
|
toAppend = append(toAppend, i)
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)
|
i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
re_postfix = append(re_postfix, re_runes[i])
|
toAppend = append(toAppend, re_runes[i])
|
||||||
}
|
}
|
||||||
continue
|
// Replace the last character (which should have been ']', with RBRACKET
|
||||||
|
toAppend[len(toAppend)-1] = RBRACKET
|
||||||
|
if invertMatch {
|
||||||
|
toAppend = setDifference(dotChars(), toAppend) // Take the inverse of the set by getting the difference between it and all dot characters
|
||||||
|
toAppend = append(toAppend, RBRACKET) // Since RBRACKET doesn't exist in dotChars, it wouldn't have been return in setDifference. We manually append it here.
|
||||||
|
}
|
||||||
|
re_postfix = append(re_postfix, toAppend...)
|
||||||
}
|
}
|
||||||
if (re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
|
if (re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
|
||||||
if i < len(re_runes)-1 {
|
if i < len(re_runes)-1 {
|
||||||
@@ -104,28 +120,7 @@ func shuntingYard(re string) []postfixNode {
|
|||||||
panic("ERROR: Backslash with no escape character.")
|
panic("ERROR: Backslash with no escape character.")
|
||||||
}
|
}
|
||||||
i++
|
i++
|
||||||
switch re_postfix[i] {
|
outQueue = append(outQueue, newEscapedNode(re_postfix[i]))
|
||||||
case 's': // Whitespace
|
|
||||||
outQueue = append(outQueue, newPostfixNode(whitespaceChars...))
|
|
||||||
case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it
|
|
||||||
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
|
|
||||||
return slices.Contains(whitespaceChars, r)
|
|
||||||
})...))
|
|
||||||
case 'd': // Digits
|
|
||||||
outQueue = append(outQueue, newPostfixNode(digitChars...))
|
|
||||||
case 'D': // Non-digits - same fancy way as 'S'
|
|
||||||
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
|
|
||||||
return slices.Contains(digitChars, r)
|
|
||||||
})...))
|
|
||||||
case 'w': // word character
|
|
||||||
outQueue = append(outQueue, newPostfixNode(wordChars...))
|
|
||||||
case 'W': // Non-word character - same fancy way as 'S' and 'D'
|
|
||||||
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
|
|
||||||
return slices.Contains(wordChars, r)
|
|
||||||
})...))
|
|
||||||
default: // None of the above - append it as a regular character
|
|
||||||
outQueue = append(outQueue, newPostfixCharNode(re_postfix[i]))
|
|
||||||
}
|
|
||||||
continue // Escaped character will automatically be skipped when loop variable increments
|
continue // Escaped character will automatically be skipped when loop variable increments
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -153,11 +148,11 @@ func shuntingYard(re string) []postfixNode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if c == '[' { // Used for character classes
|
if c == LBRACKET { // Used for character classes
|
||||||
i++ // Step forward so we can look at the character class
|
i++ // Step forward so we can look at the character class
|
||||||
chars := make([]rune, 0) // List of characters - used only for character classes
|
chars := make([]rune, 0) // List of characters - used only for character classes
|
||||||
for i < len(re_postfix) {
|
for i < len(re_postfix) {
|
||||||
if re_postfix[i] == ']' {
|
if re_postfix[i] == RBRACKET {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
chars = append(chars, re_postfix[i])
|
chars = append(chars, re_postfix[i])
|
||||||
@@ -167,7 +162,7 @@ func shuntingYard(re string) []postfixNode {
|
|||||||
panic("ERROR: Opening bracket without closing bracket.")
|
panic("ERROR: Opening bracket without closing bracket.")
|
||||||
}
|
}
|
||||||
outQueue = append(outQueue, newPostfixNode(chars...))
|
outQueue = append(outQueue, newPostfixNode(chars...))
|
||||||
i++ // Step forward to skip closing bracket
|
// i++ // Step forward to skip closing bracket
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if c == '(' {
|
if c == '(' {
|
||||||
|
Reference in New Issue
Block a user