From c92b3d0e7c3b995072e09783f1831fa83356146b Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Tue, 28 Jan 2025 12:12:37 -0500 Subject: [PATCH] Removed case-insensitive functionality from shuntingYard(); should be put in thompson() instead --- compile.go | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/compile.go b/compile.go index e24ed77..5421a38 100644 --- a/compile.go +++ b/compile.go @@ -74,6 +74,9 @@ func getPOSIXClass(str []rune) (bool, string) { return true, rtv } +// Stores whether the case-insensitive flag has been enabled. +var caseInsensitive bool + /* The Shunting-Yard algorithm is used to convert the given infix (regeular) expression to postfix. The primary benefit of this is getting rid of parentheses. @@ -85,7 +88,7 @@ See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation- func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { // Check which flags are enabled - caseInsensitive := false + caseInsensitive = false // In Multiline mode, the newline character is considered a // 'dot' character ie. the dot metacharacter matches a newline as well. if slices.Contains(flags, RE_MULTILINE) { @@ -324,7 +327,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { */ c := re_postfix[i] if isNormalChar(c) || isSpecialCharWithMetacharReplacement(c) { - outQueue = append(outQueue, newPostfixNode(allCases(c, caseInsensitive)...)) + outQueue = append(outQueue, newPostfixNode(c)) continue } // Since every unescaped bracket is replaced by a LBRACKET / RBRACKET, there may @@ -352,7 +355,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { if n < 1 || err != nil { return nil, fmt.Errorf("error parsing expanded hex code in expression") } - outQueue = append(outQueue, newPostfixCharNode(allCases(rune(hexVal), caseInsensitive)...)) + outQueue = append(outQueue, newPostfixCharNode(rune(hexVal))) i += 7 } else if i < len(re_postfix)-1 { // Two-digit hex code hexVal, err := strconv.ParseInt(string([]rune{re_postfix[i], re_postfix[i+1]}), 16, 64) // Convert the two hex values into a rune slice, then to a string. Parse the string into an int with strconv.ParseInt() @@ -360,7 +363,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("error parsing hex characters in expression") } i++ // Loop increment will take care of going forward - outQueue = append(outQueue, newPostfixCharNode(allCases(rune(hexVal), caseInsensitive)...)) + outQueue = append(outQueue, newPostfixCharNode(rune(hexVal))) } else { return nil, fmt.Errorf("not enough hex characters found in expression") } @@ -380,7 +383,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("invalid octal value in expression") } i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically - outQueue = append(outQueue, newPostfixCharNode(allCases(rune(octVal), caseInsensitive)...)) + outQueue = append(outQueue, newPostfixCharNode(rune(octVal))) } else { escapedNode, err := newEscapedNode(re_postfix[i], false) if err != nil { @@ -520,7 +523,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { if n < 1 || err != nil { return nil, fmt.Errorf("error parsing expanded hex code in character class") } - chars = append(chars, newPostfixCharNode(allCases(rune(hexVal), caseInsensitive)...)) + chars = append(chars, newPostfixCharNode(rune(hexVal))) i += 8 } else if i < len(re_postfix)-2 { // Two-digit hex code hexVal, err := strconv.ParseInt(string([]rune{re_postfix[i], re_postfix[i+1]}), 16, 64) // Convert the two hex values into a rune slice, then to a string. Parse the string into an int with strconv.ParseInt() @@ -528,7 +531,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("error parsing hex characters in character class") } i += 2 - chars = append(chars, newPostfixCharNode(allCases(rune(hexVal), caseInsensitive)...)) + chars = append(chars, newPostfixCharNode(rune(hexVal))) } else { return nil, fmt.Errorf("not enough hex characters found in character class") } @@ -548,7 +551,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("invalid octal value in character class") } i += numDigitsParsed // Shift forward by the number of characters parsed - chars = append(chars, newPostfixCharNode(allCases(rune(octVal), caseInsensitive)...)) + chars = append(chars, newPostfixCharNode(rune(octVal))) } else { escapedNode, err := newEscapedNode(re_postfix[i], true) if err != nil { @@ -577,23 +580,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { nodeToAdd = newPostfixCharNode(genRangeInclusive('0', '9')...) case "upper": // [A-Z] charsToAdd := genRangeInclusive('A', 'Z') - if caseInsensitive { - // Convert each rune to a slice of runes using allCases, then flatten the resulting - // 2-D slice into a 1-D slice. Assign the result to charsToAdd. - charsToAdd = slices.Concat(Map(charsToAdd, func(r rune) []rune { - return allCases(r, caseInsensitive) - })...) - } nodeToAdd = newPostfixCharNode(charsToAdd...) case "lower": // [a-z] charsToAdd := genRangeInclusive('a', 'z') - if caseInsensitive { - // Convert each rune to a slice of runes using allCases, then flatten the resulting - // 2-D slice into a 1-D slice. Assign the result to charsToAdd. - charsToAdd = slices.Concat(Map(charsToAdd, func(r rune) []rune { - return allCases(r, caseInsensitive) - })...) - } nodeToAdd = newPostfixCharNode(charsToAdd...) case "alpha": //[A-Za-z] nodeToAdd = newPostfixCharNode(slices.Concat(genRangeInclusive('A', 'Z'), genRangeInclusive('a', 'z'))...) @@ -642,7 +631,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("error parsing high-range unicode value in character class") } } - chars = append(chars, newPostfixCharNode(allCases(re_postfix[i], caseInsensitive)...)) + chars = append(chars, newPostfixCharNode(re_postfix[i])) i++ } firstCharAdded = true