Added support for some escaped metacharacters

master
Aadhavan Srinivasan 2 months ago
parent b8d5ea0897
commit 1a7fd12569

@ -35,7 +35,7 @@ func shuntingYard(re string) []postfixNode {
re_runes := []rune(re) // Convert the string to a slice of runes to allow iteration through it
/* Add concatenation operators.
Only add a concatenation operator between two characters if both the following conditions are met:
1. The first character isn't an opening parantheses or alteration operator.
1. The first character isn't an opening parantheses or alteration operator (or an escape character)
a. This makes sense, because these operators can't be _concatenated_ with anything else.
2. The second character isn't a 'closing operator' - one that applies to something before it
a. Again, these operators can'be concatenated _to_. They can, however, be concatenated _from_.
@ -67,7 +67,7 @@ func shuntingYard(re string) []postfixNode {
}
continue
}
if re_runes[i] != '(' && re_runes[i] != '|' {
if re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\' {
if i < len(re_runes)-1 {
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' {
re_postfix = append(re_postfix, CONCAT)
@ -98,16 +98,39 @@ func shuntingYard(re string) []postfixNode {
outQueue = append(outQueue, newPostfixNode(c))
continue
}
// Escape character - NOT IMPLEMENTED YET - DO NOT USE
// if c == '\\' { // Escape character - next character is treated as alphanum
// if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
// panic("ERROR: Backslash with no escape character.")
// }
// outQueue = append(outQueue, re_postfix[i+1])
// }
// Escape character
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
panic("ERROR: Backslash with no escape character.")
}
i++
switch re_postfix[i] {
case 's': // Whitespace
outQueue = append(outQueue, newPostfixNode(whitespaceChars...))
case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
return !slices.Contains(whitespaceChars, r)
})...))
case 'd': // Digits
outQueue = append(outQueue, newPostfixNode(digitChars...))
case 'D': // Non-digits - same fancy way as 'S'
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
return !slices.Contains(digitChars, r)
})...))
case 'w': // word character
outQueue = append(outQueue, newPostfixNode(wordChars...))
case 'W': // Non-word character - same fancy way as 'S' and 'D'
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
return !slices.Contains(wordChars, r)
})...))
default: // None of the above - append it as a regular character
outQueue = append(outQueue, newPostfixCharNode(re_postfix[i]))
}
continue // Escaped character will automatically be skipped when loop variable increments
}
if c == '.' { // Dot metacharacter - represents 'any' character, but I am only adding Unicode 0020-007E
outQueue = append(outQueue, newPostfixNode(dotCharacters()...))
outQueue = append(outQueue, newPostfixNode(dotChars()...))
continue
}
if isOperator(c) {
@ -247,7 +270,6 @@ func main() {
if err != nil {
panic(err)
}
fmt.Scanln(&test_str)
re_postfix := shuntingYard(re)
// fmt.Println(re_postfix)

Loading…
Cancel
Save