Added support for some escaped metacharacters

master
Aadhavan Srinivasan 2 months ago
parent b8d5ea0897
commit 1a7fd12569

@ -35,7 +35,7 @@ func shuntingYard(re string) []postfixNode {
re_runes := []rune(re) // Convert the string to a slice of runes to allow iteration through it re_runes := []rune(re) // Convert the string to a slice of runes to allow iteration through it
/* Add concatenation operators. /* Add concatenation operators.
Only add a concatenation operator between two characters if both the following conditions are met: Only add a concatenation operator between two characters if both the following conditions are met:
1. The first character isn't an opening parantheses or alteration operator. 1. The first character isn't an opening parantheses or alteration operator (or an escape character)
a. This makes sense, because these operators can't be _concatenated_ with anything else. a. This makes sense, because these operators can't be _concatenated_ with anything else.
2. The second character isn't a 'closing operator' - one that applies to something before it 2. The second character isn't a 'closing operator' - one that applies to something before it
a. Again, these operators can'be concatenated _to_. They can, however, be concatenated _from_. a. Again, these operators can'be concatenated _to_. They can, however, be concatenated _from_.
@ -67,7 +67,7 @@ func shuntingYard(re string) []postfixNode {
} }
continue continue
} }
if re_runes[i] != '(' && re_runes[i] != '|' { if re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\' {
if i < len(re_runes)-1 { if i < len(re_runes)-1 {
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' { if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' {
re_postfix = append(re_postfix, CONCAT) re_postfix = append(re_postfix, CONCAT)
@ -98,16 +98,39 @@ func shuntingYard(re string) []postfixNode {
outQueue = append(outQueue, newPostfixNode(c)) outQueue = append(outQueue, newPostfixNode(c))
continue continue
} }
// Escape character - NOT IMPLEMENTED YET - DO NOT USE // Escape character
// if c == '\\' { // Escape character - next character is treated as alphanum if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
// if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it) if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
// panic("ERROR: Backslash with no escape character.") panic("ERROR: Backslash with no escape character.")
// } }
// outQueue = append(outQueue, re_postfix[i+1]) i++
// } switch re_postfix[i] {
case 's': // Whitespace
outQueue = append(outQueue, newPostfixNode(whitespaceChars...))
case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
return !slices.Contains(whitespaceChars, r)
})...))
case 'd': // Digits
outQueue = append(outQueue, newPostfixNode(digitChars...))
case 'D': // Non-digits - same fancy way as 'S'
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
return !slices.Contains(digitChars, r)
})...))
case 'w': // word character
outQueue = append(outQueue, newPostfixNode(wordChars...))
case 'W': // Non-word character - same fancy way as 'S' and 'D'
outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
return !slices.Contains(wordChars, r)
})...))
default: // None of the above - append it as a regular character
outQueue = append(outQueue, newPostfixCharNode(re_postfix[i]))
}
continue // Escaped character will automatically be skipped when loop variable increments
}
if c == '.' { // Dot metacharacter - represents 'any' character, but I am only adding Unicode 0020-007E if c == '.' { // Dot metacharacter - represents 'any' character, but I am only adding Unicode 0020-007E
outQueue = append(outQueue, newPostfixNode(dotCharacters()...)) outQueue = append(outQueue, newPostfixNode(dotChars()...))
continue continue
} }
if isOperator(c) { if isOperator(c) {
@ -247,7 +270,6 @@ func main() {
if err != nil { if err != nil {
panic(err) panic(err)
} }
fmt.Scanln(&test_str) fmt.Scanln(&test_str)
re_postfix := shuntingYard(re) re_postfix := shuntingYard(re)
// fmt.Println(re_postfix) // fmt.Println(re_postfix)

Loading…
Cancel
Save