diff --git a/range2regex.go b/range2regex.go new file mode 100644 index 0000000..8b7ede1 --- /dev/null +++ b/range2regex.go @@ -0,0 +1,126 @@ +package main + +import ( + "fmt" + "math" + "strconv" +) + +type numRange struct { + start int + end int +} + +// Returns the exponent of the closest power of 10 smaller +// than the given value. +func floorPower10(val int) int { + return int(math.Floor(math.Log10(float64(val)))) +} + +// Returns smallest multiple of 10^exp, that is greater than val +func roundUpToNearest10Multiple(val int, exp int) int { + bench := int(math.Round(math.Pow10(exp))) + if val != 0 && val%bench == 0 { + return val + } else { + return (bench - val%bench) + val + } +} + +func roundDownToNearest10Multiple(val int, exp int) int { + bench := int(math.Round(math.Pow10(exp))) + return val - val%bench +} + +// Converts the given integer into an int-slice, where each element +// represents a digit of the number. +func intToSlc(val int) []int { + valStr := strconv.Itoa(val) + valSlc := []rune(valStr) + toRet := make([]int, len(valStr)) + for i, r := range valSlc { + toRet[i] = int(r - 48) + } + return toRet +} + +func range2regex(start int, end int) string { + rangeStart := start + rangeEnd := end + if rangeStart > rangeEnd { + panic("Range start greater than range end.") + } + + ranges := make([]numRange, 0) + // If both numbers are in the same power of 10 eg. 15000 and 17000. + // the maximum power of 10 that we will go to, is determined by the largest + // power of 10 at which both numbers differ. Given 15000 and 17000, we will + // go up to 10^3, because that is the largestindex at which they differ. + startRangeSlc := intToSlc(rangeStart) + endRangeSlc := intToSlc(rangeEnd) + maxPower10 := 0 + if len(startRangeSlc) != len(endRangeSlc) { // Different number of digits, so we will go up to the maximum (which must be rangeEnd) + maxPower10 = floorPower10(rangeEnd) // Maximum power of 10 that we will reach + } else { + maxPower10 = 0 + for i := range startRangeSlc { + if startRangeSlc[i] != endRangeSlc[i] { + maxPower10 = len(startRangeSlc) - i - 1 + break + } + } + } + + tmp := rangeStart + exp := 1 // The exponent of 10 that we are finding the range to + + // Increasing up to highest power + for exp <= maxPower10 { + tmpRangeEnd := roundUpToNearest10Multiple(tmp, exp) + if tmp != tmpRangeEnd { + ranges = append(ranges, numRange{tmp, tmpRangeEnd - 1}) + } + tmp = tmpRangeEnd + exp++ + } + + exp-- + + // Decreasing down to lowest power + for exp >= 1 { + tmpRangeEnd := roundDownToNearest10Multiple(rangeEnd, exp) + if tmp != tmpRangeEnd { + ranges = append(ranges, numRange{tmp, tmpRangeEnd - 1}) + } + tmp = tmpRangeEnd + exp-- + } + + // Last range - tmp to rangeEnd + ranges = append(ranges, numRange{tmp, rangeEnd}) + + regex := "(" + // Generate the regex + for i, rg := range ranges { + if i > 0 { + regex += "|" + } + regex += "(" + startSlc := intToSlc(rg.start) + endSlc := intToSlc(rg.end) + if len(startSlc) != len(endSlc) { + panic("Ranges have unequal lengths.") + } + for i := range startSlc { + if startSlc[i] == endSlc[i] { + regex += string(rune(startSlc[i] + 48)) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48. + } else { + regex += fmt.Sprintf("[%c-%c]", rune(startSlc[i]+48), rune(endSlc[i]+48)) + } + } + regex += ")" + } + regex += ")" + return regex + +}