|
|
|
package regex
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"math"
|
|
|
|
"slices"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
)
|
|
|
|
|
|
|
|
type numRange struct {
|
|
|
|
start int
|
|
|
|
end int
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the exponent of the closest power of 10 smaller
|
|
|
|
// than the given value.
|
|
|
|
func floorPower10(val int) int {
|
|
|
|
return int(math.Floor(math.Log10(float64(val))))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns smallest multiple of 10^exp, that is greater than val
|
|
|
|
func roundUpToNearest10Multiple(val int, exp int) int {
|
|
|
|
bench := int(math.Round(math.Pow10(exp)))
|
|
|
|
if val != 0 && val%bench == 0 {
|
|
|
|
return val
|
|
|
|
} else {
|
|
|
|
return (bench - val%bench) + val
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func roundDownToNearest10Multiple(val int, exp int) int {
|
|
|
|
bench := int(math.Round(math.Pow10(exp)))
|
|
|
|
return val - val%bench
|
|
|
|
}
|
|
|
|
|
|
|
|
// Converts the given integer into an int-slice, where each element
|
|
|
|
// represents a digit of the number.
|
|
|
|
func intToSlc(val int) []int {
|
|
|
|
valStr := strconv.Itoa(val)
|
|
|
|
valSlc := []rune(valStr)
|
|
|
|
toRet := make([]int, len(valStr))
|
|
|
|
for i, r := range valSlc {
|
|
|
|
toRet[i] = int(r - 48)
|
|
|
|
}
|
|
|
|
return toRet
|
|
|
|
}
|
|
|
|
|
|
|
|
func range2regex(start int, end int) (string, error) {
|
|
|
|
rangeStart := start
|
|
|
|
rangeEnd := end
|
|
|
|
if rangeStart > rangeEnd {
|
|
|
|
return "", fmt.Errorf("numeric range start greater than range end")
|
|
|
|
}
|
|
|
|
|
|
|
|
ranges := make([]numRange, 0)
|
|
|
|
// If both numbers are in the same power of 10 eg. 15000 and 17000.
|
|
|
|
// the maximum power of 10 that we will go to, is determined by the largest
|
|
|
|
// power of 10 at which both numbers differ. Given 15000 and 17000, we will
|
|
|
|
// go up to 10^3, because that is the largestindex at which they differ.
|
|
|
|
startRangeSlc := intToSlc(rangeStart)
|
|
|
|
endRangeSlc := intToSlc(rangeEnd)
|
|
|
|
maxPower10 := 0
|
|
|
|
if len(startRangeSlc) != len(endRangeSlc) { // Different number of digits, so we will go up to the maximum (which must be rangeEnd)
|
|
|
|
maxPower10 = floorPower10(rangeEnd) // Maximum power of 10 that we will reach
|
|
|
|
} else {
|
|
|
|
maxPower10 = 0
|
|
|
|
for i := range startRangeSlc {
|
|
|
|
if startRangeSlc[i] != endRangeSlc[i] {
|
|
|
|
maxPower10 = len(startRangeSlc) - i - 1
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp := rangeStart
|
|
|
|
exp := 1 // The exponent of 10 that we are finding the range to
|
|
|
|
|
|
|
|
// Increasing up to highest power
|
|
|
|
for exp <= maxPower10 {
|
|
|
|
tmpRangeEnd := roundUpToNearest10Multiple(tmp, exp)
|
|
|
|
if tmp != tmpRangeEnd {
|
|
|
|
ranges = append(ranges, numRange{tmp, tmpRangeEnd - 1})
|
|
|
|
}
|
|
|
|
tmp = tmpRangeEnd
|
|
|
|
exp++
|
|
|
|
}
|
|
|
|
|
|
|
|
exp--
|
|
|
|
|
|
|
|
// Decreasing down to lowest power
|
|
|
|
for exp >= 1 {
|
|
|
|
tmpRangeEnd := roundDownToNearest10Multiple(rangeEnd, exp)
|
|
|
|
if tmp != tmpRangeEnd {
|
|
|
|
ranges = append(ranges, numRange{tmp, tmpRangeEnd - 1})
|
|
|
|
}
|
|
|
|
tmp = tmpRangeEnd
|
|
|
|
exp--
|
|
|
|
}
|
|
|
|
|
|
|
|
// Last range - tmp to rangeEnd
|
|
|
|
ranges = append(ranges, numRange{tmp, rangeEnd})
|
|
|
|
|
|
|
|
regexSlice := make([]string, 0)
|
|
|
|
// Generate the regex
|
|
|
|
for _, rg := range ranges {
|
|
|
|
tmpStr := ""
|
|
|
|
tmpStr += string(nonCapLparenRune)
|
|
|
|
startSlc := intToSlc(rg.start)
|
|
|
|
endSlc := intToSlc(rg.end)
|
|
|
|
if len(startSlc) != len(endSlc) {
|
|
|
|
return "", fmt.Errorf("error parsing numeric range")
|
|
|
|
}
|
|
|
|
for i := range startSlc {
|
|
|
|
if startSlc[i] == endSlc[i] {
|
|
|
|
tmpStr += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48.
|
|
|
|
} else {
|
|
|
|
tmpStr += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
tmpStr += ")"
|
|
|
|
regexSlice = append(regexSlice, tmpStr)
|
|
|
|
}
|
|
|
|
// Each element of the slice represents one 'group'. Taking 0-255 as an example, the elements would be:
|
|
|
|
// 1. 0-9
|
|
|
|
// 2. 10-99
|
|
|
|
// 3. 100-199
|
|
|
|
// 4. 200-249
|
|
|
|
// 5. 250-255
|
|
|
|
//
|
|
|
|
// The reason this is reversed before joining it, is because it is incompatible with the PCRE rule for matching.
|
|
|
|
// The PCRE rule specifies that the left-branch of an alternation is preferred. Even though this engine uses the POSIX
|
|
|
|
// rule at the moment (which prefers the longest match regardless of the order of the alternation), reversing the string
|
|
|
|
// has no downsides. It doesn't affect POSIX matching, and it will reduce my burden if I decide to switch to PCRE matching.
|
|
|
|
slices.Reverse(regexSlice)
|
|
|
|
regex := string(nonCapLparenRune) + strings.Join(regexSlice, "|") + ")"
|
|
|
|
return regex, nil
|
|
|
|
|
|
|
|
}
|