Reverse the order of the numeric range before adding it, to maintain compatibility with PCRE matching rules (even though I don't implement them, if I do in the future)

posixStyleMatching
Aadhavan Srinivasan 1 month ago
parent 7e6b02632f
commit ef476e8875

@ -3,7 +3,9 @@ package regex
import (
"fmt"
"math"
"slices"
"strconv"
"strings"
)
type numRange struct {
@ -99,13 +101,11 @@ func range2regex(start int, end int) (string, error) {
// Last range - tmp to rangeEnd
ranges = append(ranges, numRange{tmp, rangeEnd})
regex := string(nonCapLparenRune)
regexSlice := make([]string, 0)
// Generate the regex
for i, rg := range ranges {
if i > 0 {
regex += "|"
}
regex += string(nonCapLparenRune)
for _, rg := range ranges {
tmpStr := ""
tmpStr += string(nonCapLparenRune)
startSlc := intToSlc(rg.start)
endSlc := intToSlc(rg.end)
if len(startSlc) != len(endSlc) {
@ -113,14 +113,27 @@ func range2regex(start int, end int) (string, error) {
}
for i := range startSlc {
if startSlc[i] == endSlc[i] {
regex += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48.
tmpStr += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48.
} else {
regex += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune)
tmpStr += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune)
}
}
regex += ")"
tmpStr += ")"
regexSlice = append(regexSlice, tmpStr)
}
regex += ")"
// Each element of the slice represents one 'group'. Taking 0-255 as an example, the elements would be:
// 1. 0-9
// 2. 10-99
// 3. 100-199
// 4. 200-249
// 5. 250-255
//
// The reason this is reversed before joining it, is because it is incompatible with the PCRE rule for matching.
// The PCRE rule specifies that the left-branch of an alternation is preferred. Even though this engine uses the POSIX
// rule at the moment (which prefers the longest match regardless of the order of the alternation), reversing the string
// has no downsides. It doesn't affect POSIX matching, and it will reduce my burden if I decide to switch to PCRE matching.
slices.Reverse(regexSlice)
regex := string(nonCapLparenRune) + strings.Join(regexSlice, "|") + ")"
return regex, nil
}

Loading…
Cancel
Save