package regex import ( "fmt" "math" "slices" "strconv" "strings" ) type numRange struct { start int end int } // Returns the exponent of the closest power of 10 smaller // than the given value. func floorPower10(val int) int { return int(math.Floor(math.Log10(float64(val)))) } // Returns smallest multiple of 10^exp, that is greater than val func roundUpToNearest10Multiple(val int, exp int) int { bench := int(math.Round(math.Pow10(exp))) if val != 0 && val%bench == 0 { return val } else { return (bench - val%bench) + val } } func roundDownToNearest10Multiple(val int, exp int) int { bench := int(math.Round(math.Pow10(exp))) return val - val%bench } // Converts the given integer into an int-slice, where each element // represents a digit of the number. func intToSlc(val int) []int { valStr := strconv.Itoa(val) valSlc := []rune(valStr) toRet := make([]int, len(valStr)) for i, r := range valSlc { toRet[i] = int(r - 48) } return toRet } func range2regex(start int, end int) (string, error) { rangeStart := start rangeEnd := end if rangeStart > rangeEnd { return "", fmt.Errorf("numeric range start greater than range end") } ranges := make([]numRange, 0) // If both numbers are in the same power of 10 eg. 15000 and 17000. // the maximum power of 10 that we will go to, is determined by the largest // power of 10 at which both numbers differ. Given 15000 and 17000, we will // go up to 10^3, because that is the largestindex at which they differ. startRangeSlc := intToSlc(rangeStart) endRangeSlc := intToSlc(rangeEnd) maxPower10 := 0 if len(startRangeSlc) != len(endRangeSlc) { // Different number of digits, so we will go up to the maximum (which must be rangeEnd) maxPower10 = floorPower10(rangeEnd) // Maximum power of 10 that we will reach } else { maxPower10 = 0 for i := range startRangeSlc { if startRangeSlc[i] != endRangeSlc[i] { maxPower10 = len(startRangeSlc) - i - 1 break } } } tmp := rangeStart exp := 1 // The exponent of 10 that we are finding the range to // Increasing up to highest power for exp <= maxPower10 { tmpRangeEnd := roundUpToNearest10Multiple(tmp, exp) if tmp != tmpRangeEnd { ranges = append(ranges, numRange{tmp, tmpRangeEnd - 1}) } tmp = tmpRangeEnd exp++ } exp-- // Decreasing down to lowest power for exp >= 1 { tmpRangeEnd := roundDownToNearest10Multiple(rangeEnd, exp) if tmp != tmpRangeEnd { ranges = append(ranges, numRange{tmp, tmpRangeEnd - 1}) } tmp = tmpRangeEnd exp-- } // Last range - tmp to rangeEnd ranges = append(ranges, numRange{tmp, rangeEnd}) regexSlice := make([]string, 0) // Generate the regex for _, rg := range ranges { tmpStr := "" tmpStr += string(nonCapLparenRune) startSlc := intToSlc(rg.start) endSlc := intToSlc(rg.end) if len(startSlc) != len(endSlc) { return "", fmt.Errorf("Error parsing numeric range") } for i := range startSlc { if startSlc[i] == endSlc[i] { tmpStr += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48. } else { tmpStr += fmt.Sprintf("%c%c-%c%c", lbracketRune, rune(startSlc[i]+48), rune(endSlc[i]+48), rbracketRune) } } tmpStr += ")" regexSlice = append(regexSlice, tmpStr) } // Each element of the slice represents one 'group'. Taking 0-255 as an example, the elements would be: // 1. 0-9 // 2. 10-99 // 3. 100-199 // 4. 200-249 // 5. 250-255 // // The reason this is reversed before joining it, is because it is incompatible with the PCRE rule for matching. // The PCRE rule specifies that the left-branch of an alternation is preferred. Even though this engine uses the POSIX // rule at the moment (which prefers the longest match regardless of the order of the alternation), reversing the string // has no downsides. It doesn't affect POSIX matching, and it will reduce my burden if I decide to switch to PCRE matching. slices.Reverse(regexSlice) regex := string(nonCapLparenRune) + strings.Join(regexSlice, "|") + ")" return regex, nil }