From 861eb6067eea2bdcfc6baa0bfb33be679f136609 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Tue, 28 Jan 2025 11:41:17 -0500 Subject: [PATCH] Apply case-insensitive flag inside character classes as well --- compile.go | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/compile.go b/compile.go index 907bb19..e24ed77 100644 --- a/compile.go +++ b/compile.go @@ -324,11 +324,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { */ c := re_postfix[i] if isNormalChar(c) || isSpecialCharWithMetacharReplacement(c) { - if caseInsensitive { - outQueue = append(outQueue, newPostfixNode(allCases(c)...)) - } else { - outQueue = append(outQueue, newPostfixNode(c)) - } + outQueue = append(outQueue, newPostfixNode(allCases(c, caseInsensitive)...)) continue } // Since every unescaped bracket is replaced by a LBRACKET / RBRACKET, there may @@ -356,7 +352,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { if n < 1 || err != nil { return nil, fmt.Errorf("error parsing expanded hex code in expression") } - outQueue = append(outQueue, newPostfixCharNode(rune(hexVal))) + outQueue = append(outQueue, newPostfixCharNode(allCases(rune(hexVal), caseInsensitive)...)) i += 7 } else if i < len(re_postfix)-1 { // Two-digit hex code hexVal, err := strconv.ParseInt(string([]rune{re_postfix[i], re_postfix[i+1]}), 16, 64) // Convert the two hex values into a rune slice, then to a string. Parse the string into an int with strconv.ParseInt() @@ -364,7 +360,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("error parsing hex characters in expression") } i++ // Loop increment will take care of going forward - outQueue = append(outQueue, newPostfixCharNode(rune(hexVal))) + outQueue = append(outQueue, newPostfixCharNode(allCases(rune(hexVal), caseInsensitive)...)) } else { return nil, fmt.Errorf("not enough hex characters found in expression") } @@ -384,7 +380,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("invalid octal value in expression") } i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically - outQueue = append(outQueue, newPostfixCharNode(rune(octVal))) + outQueue = append(outQueue, newPostfixCharNode(allCases(rune(octVal), caseInsensitive)...)) } else { escapedNode, err := newEscapedNode(re_postfix[i], false) if err != nil { @@ -524,7 +520,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { if n < 1 || err != nil { return nil, fmt.Errorf("error parsing expanded hex code in character class") } - chars = append(chars, newPostfixCharNode(rune(hexVal))) + chars = append(chars, newPostfixCharNode(allCases(rune(hexVal), caseInsensitive)...)) i += 8 } else if i < len(re_postfix)-2 { // Two-digit hex code hexVal, err := strconv.ParseInt(string([]rune{re_postfix[i], re_postfix[i+1]}), 16, 64) // Convert the two hex values into a rune slice, then to a string. Parse the string into an int with strconv.ParseInt() @@ -532,7 +528,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("error parsing hex characters in character class") } i += 2 - chars = append(chars, newPostfixCharNode(rune(hexVal))) + chars = append(chars, newPostfixCharNode(allCases(rune(hexVal), caseInsensitive)...)) } else { return nil, fmt.Errorf("not enough hex characters found in character class") } @@ -552,7 +548,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("invalid octal value in character class") } i += numDigitsParsed // Shift forward by the number of characters parsed - chars = append(chars, newPostfixCharNode(rune(octVal))) + chars = append(chars, newPostfixCharNode(allCases(rune(octVal), caseInsensitive)...)) } else { escapedNode, err := newEscapedNode(re_postfix[i], true) if err != nil { @@ -580,9 +576,25 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { case "digit": // Equivalent to '\d' nodeToAdd = newPostfixCharNode(genRangeInclusive('0', '9')...) case "upper": // [A-Z] - nodeToAdd = newPostfixCharNode(genRangeInclusive('A', 'Z')...) + charsToAdd := genRangeInclusive('A', 'Z') + if caseInsensitive { + // Convert each rune to a slice of runes using allCases, then flatten the resulting + // 2-D slice into a 1-D slice. Assign the result to charsToAdd. + charsToAdd = slices.Concat(Map(charsToAdd, func(r rune) []rune { + return allCases(r, caseInsensitive) + })...) + } + nodeToAdd = newPostfixCharNode(charsToAdd...) case "lower": // [a-z] - nodeToAdd = newPostfixCharNode(genRangeInclusive('a', 'z')...) + charsToAdd := genRangeInclusive('a', 'z') + if caseInsensitive { + // Convert each rune to a slice of runes using allCases, then flatten the resulting + // 2-D slice into a 1-D slice. Assign the result to charsToAdd. + charsToAdd = slices.Concat(Map(charsToAdd, func(r rune) []rune { + return allCases(r, caseInsensitive) + })...) + } + nodeToAdd = newPostfixCharNode(charsToAdd...) case "alpha": //[A-Za-z] nodeToAdd = newPostfixCharNode(slices.Concat(genRangeInclusive('A', 'Z'), genRangeInclusive('a', 'z'))...) case "xdigit": // [0-9A-Fa-f] @@ -630,7 +642,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, fmt.Errorf("error parsing high-range unicode value in character class") } } - chars = append(chars, newPostfixCharNode(re_postfix[i])) + chars = append(chars, newPostfixCharNode(allCases(re_postfix[i], caseInsensitive)...)) i++ } firstCharAdded = true