Reverse the order of the numeric range before adding it, to maintain compatibility with PCRE matching rules (even though I don't implement them, if I do in the future)

posixStyleMatching
Aadhavan Srinivasan 1 month ago
parent 7e6b02632f
commit ef476e8875

@ -3,7 +3,9 @@ package regex
import ( import (
"fmt" "fmt"
"math" "math"
"slices"
"strconv" "strconv"
"strings"
) )
type numRange struct { type numRange struct {
@ -99,13 +101,11 @@ func range2regex(start int, end int) (string, error) {
// Last range - tmp to rangeEnd // Last range - tmp to rangeEnd
ranges = append(ranges, numRange{tmp, rangeEnd}) ranges = append(ranges, numRange{tmp, rangeEnd})
regex := string(nonCapLparenRune) regexSlice := make([]string, 0)
// Generate the regex // Generate the regex
for i, rg := range ranges { for _, rg := range ranges {
if i > 0 { tmpStr := ""
regex += "|" tmpStr += string(nonCapLparenRune)
}
regex += string(nonCapLparenRune)
startSlc := intToSlc(rg.start) startSlc := intToSlc(rg.start)
endSlc := intToSlc(rg.end) endSlc := intToSlc(rg.end)
if len(startSlc) != len(endSlc) { if len(startSlc) != len(endSlc) {
@ -113,14 +113,27 @@ func range2regex(start int, end int) (string, error) {
} }
for i := range startSlc { for i := range startSlc {
if startSlc[i] == endSlc[i] { if startSlc[i] == endSlc[i] {
regex += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48. tmpStr += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48.
} else { } else {
regex += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune) tmpStr += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune)
} }
} }
regex += ")" tmpStr += ")"
regexSlice = append(regexSlice, tmpStr)
} }
regex += ")" // Each element of the slice represents one 'group'. Taking 0-255 as an example, the elements would be:
// 1. 0-9
// 2. 10-99
// 3. 100-199
// 4. 200-249
// 5. 250-255
//
// The reason this is reversed before joining it, is because it is incompatible with the PCRE rule for matching.
// The PCRE rule specifies that the left-branch of an alternation is preferred. Even though this engine uses the POSIX
// rule at the moment (which prefers the longest match regardless of the order of the alternation), reversing the string
// has no downsides. It doesn't affect POSIX matching, and it will reduce my burden if I decide to switch to PCRE matching.
slices.Reverse(regexSlice)
regex := string(nonCapLparenRune) + strings.Join(regexSlice, "|") + ")"
return regex, nil return regex, nil
} }

Loading…
Cancel
Save