Reverse the order of the numeric range before adding it, to maintain compatibility with PCRE matching rules (even though I don't implement them, if I do in the future)
This commit is contained in:
@@ -3,7 +3,9 @@ package regex
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type numRange struct {
|
type numRange struct {
|
||||||
@@ -99,13 +101,11 @@ func range2regex(start int, end int) (string, error) {
|
|||||||
// Last range - tmp to rangeEnd
|
// Last range - tmp to rangeEnd
|
||||||
ranges = append(ranges, numRange{tmp, rangeEnd})
|
ranges = append(ranges, numRange{tmp, rangeEnd})
|
||||||
|
|
||||||
regex := string(nonCapLparenRune)
|
regexSlice := make([]string, 0)
|
||||||
// Generate the regex
|
// Generate the regex
|
||||||
for i, rg := range ranges {
|
for _, rg := range ranges {
|
||||||
if i > 0 {
|
tmpStr := ""
|
||||||
regex += "|"
|
tmpStr += string(nonCapLparenRune)
|
||||||
}
|
|
||||||
regex += string(nonCapLparenRune)
|
|
||||||
startSlc := intToSlc(rg.start)
|
startSlc := intToSlc(rg.start)
|
||||||
endSlc := intToSlc(rg.end)
|
endSlc := intToSlc(rg.end)
|
||||||
if len(startSlc) != len(endSlc) {
|
if len(startSlc) != len(endSlc) {
|
||||||
@@ -113,14 +113,27 @@ func range2regex(start int, end int) (string, error) {
|
|||||||
}
|
}
|
||||||
for i := range startSlc {
|
for i := range startSlc {
|
||||||
if startSlc[i] == endSlc[i] {
|
if startSlc[i] == endSlc[i] {
|
||||||
regex += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48.
|
tmpStr += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48.
|
||||||
} else {
|
} else {
|
||||||
regex += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune)
|
tmpStr += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
regex += ")"
|
tmpStr += ")"
|
||||||
|
regexSlice = append(regexSlice, tmpStr)
|
||||||
}
|
}
|
||||||
regex += ")"
|
// Each element of the slice represents one 'group'. Taking 0-255 as an example, the elements would be:
|
||||||
|
// 1. 0-9
|
||||||
|
// 2. 10-99
|
||||||
|
// 3. 100-199
|
||||||
|
// 4. 200-249
|
||||||
|
// 5. 250-255
|
||||||
|
//
|
||||||
|
// The reason this is reversed before joining it, is because it is incompatible with the PCRE rule for matching.
|
||||||
|
// The PCRE rule specifies that the left-branch of an alternation is preferred. Even though this engine uses the POSIX
|
||||||
|
// rule at the moment (which prefers the longest match regardless of the order of the alternation), reversing the string
|
||||||
|
// has no downsides. It doesn't affect POSIX matching, and it will reduce my burden if I decide to switch to PCRE matching.
|
||||||
|
slices.Reverse(regexSlice)
|
||||||
|
regex := string(nonCapLparenRune) + strings.Join(regexSlice, "|") + ")"
|
||||||
return regex, nil
|
return regex, nil
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user