diff --git a/regex/range2regex.go b/regex/range2regex.go index 348cb46..a01dfff 100644 --- a/regex/range2regex.go +++ b/regex/range2regex.go @@ -3,7 +3,9 @@ package regex import ( "fmt" "math" + "slices" "strconv" + "strings" ) type numRange struct { @@ -99,13 +101,11 @@ func range2regex(start int, end int) (string, error) { // Last range - tmp to rangeEnd ranges = append(ranges, numRange{tmp, rangeEnd}) - regex := string(nonCapLparenRune) + regexSlice := make([]string, 0) // Generate the regex - for i, rg := range ranges { - if i > 0 { - regex += "|" - } - regex += string(nonCapLparenRune) + for _, rg := range ranges { + tmpStr := "" + tmpStr += string(nonCapLparenRune) startSlc := intToSlc(rg.start) endSlc := intToSlc(rg.end) if len(startSlc) != len(endSlc) { @@ -113,14 +113,27 @@ func range2regex(start int, end int) (string, error) { } for i := range startSlc { if startSlc[i] == endSlc[i] { - regex += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48. + tmpStr += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48. } else { - regex += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune) + tmpStr += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune) } } - regex += ")" + tmpStr += ")" + regexSlice = append(regexSlice, tmpStr) } - regex += ")" + // Each element of the slice represents one 'group'. Taking 0-255 as an example, the elements would be: + // 1. 0-9 + // 2. 10-99 + // 3. 100-199 + // 4. 200-249 + // 5. 250-255 + // + // The reason this is reversed before joining it, is because it is incompatible with the PCRE rule for matching. + // The PCRE rule specifies that the left-branch of an alternation is preferred. Even though this engine uses the POSIX + // rule at the moment (which prefers the longest match regardless of the order of the alternation), reversing the string + // has no downsides. It doesn't affect POSIX matching, and it will reduce my burden if I decide to switch to PCRE matching. + slices.Reverse(regexSlice) + regex := string(nonCapLparenRune) + strings.Join(regexSlice, "|") + ")" return regex, nil }