From 1a7fd12569e4a1879674845b067a63a288c363eb Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Tue, 29 Oct 2024 10:05:14 -0400 Subject: [PATCH] Added support for some escaped metacharacters --- main.go | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/main.go b/main.go index daeef74..5b82e03 100644 --- a/main.go +++ b/main.go @@ -35,7 +35,7 @@ func shuntingYard(re string) []postfixNode { re_runes := []rune(re) // Convert the string to a slice of runes to allow iteration through it /* Add concatenation operators. Only add a concatenation operator between two characters if both the following conditions are met: - 1. The first character isn't an opening parantheses or alteration operator. + 1. The first character isn't an opening parantheses or alteration operator (or an escape character) a. This makes sense, because these operators can't be _concatenated_ with anything else. 2. The second character isn't a 'closing operator' - one that applies to something before it a. Again, these operators can'be concatenated _to_. They can, however, be concatenated _from_. @@ -67,7 +67,7 @@ func shuntingYard(re string) []postfixNode { } continue } - if re_runes[i] != '(' && re_runes[i] != '|' { + if re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\' { if i < len(re_runes)-1 { if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' { re_postfix = append(re_postfix, CONCAT) @@ -98,16 +98,39 @@ func shuntingYard(re string) []postfixNode { outQueue = append(outQueue, newPostfixNode(c)) continue } - // Escape character - NOT IMPLEMENTED YET - DO NOT USE - // if c == '\\' { // Escape character - next character is treated as alphanum - // if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it) - // panic("ERROR: Backslash with no escape character.") - // } - // outQueue = append(outQueue, re_postfix[i+1]) - // } + // Escape character + if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary + if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it) + panic("ERROR: Backslash with no escape character.") + } + i++ + switch re_postfix[i] { + case 's': // Whitespace + outQueue = append(outQueue, newPostfixNode(whitespaceChars...)) + case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it + outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool { + return !slices.Contains(whitespaceChars, r) + })...)) + case 'd': // Digits + outQueue = append(outQueue, newPostfixNode(digitChars...)) + case 'D': // Non-digits - same fancy way as 'S' + outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool { + return !slices.Contains(digitChars, r) + })...)) + case 'w': // word character + outQueue = append(outQueue, newPostfixNode(wordChars...)) + case 'W': // Non-word character - same fancy way as 'S' and 'D' + outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool { + return !slices.Contains(wordChars, r) + })...)) + default: // None of the above - append it as a regular character + outQueue = append(outQueue, newPostfixCharNode(re_postfix[i])) + } + continue // Escaped character will automatically be skipped when loop variable increments + } if c == '.' { // Dot metacharacter - represents 'any' character, but I am only adding Unicode 0020-007E - outQueue = append(outQueue, newPostfixNode(dotCharacters()...)) + outQueue = append(outQueue, newPostfixNode(dotChars()...)) continue } if isOperator(c) { @@ -247,7 +270,6 @@ func main() { if err != nil { panic(err) } - fmt.Scanln(&test_str) re_postfix := shuntingYard(re) // fmt.Println(re_postfix)