@ -3,7 +3,9 @@ package regex
import (
import (
"fmt"
"fmt"
"math"
"math"
"slices"
"strconv"
"strconv"
"strings"
)
)
type numRange struct {
type numRange struct {
@ -99,13 +101,11 @@ func range2regex(start int, end int) (string, error) {
// Last range - tmp to rangeEnd
// Last range - tmp to rangeEnd
ranges = append ( ranges , numRange { tmp , rangeEnd } )
ranges = append ( ranges , numRange { tmp , rangeEnd } )
regex := string ( nonCapLparenRune )
regex Slice := make ( [ ] string , 0 )
// Generate the regex
// Generate the regex
for i , rg := range ranges {
for _ , rg := range ranges {
if i > 0 {
tmpStr := ""
regex += "|"
tmpStr += string ( nonCapLparenRune )
}
regex += string ( nonCapLparenRune )
startSlc := intToSlc ( rg . start )
startSlc := intToSlc ( rg . start )
endSlc := intToSlc ( rg . end )
endSlc := intToSlc ( rg . end )
if len ( startSlc ) != len ( endSlc ) {
if len ( startSlc ) != len ( endSlc ) {
@ -113,14 +113,27 @@ func range2regex(start int, end int) (string, error) {
}
}
for i := range startSlc {
for i := range startSlc {
if startSlc [ i ] == endSlc [ i ] {
if startSlc [ i ] == endSlc [ i ] {
regex += string ( rune ( startSlc [ i ] + 48 ) ) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48.
tmpSt r += string ( rune ( startSlc [ i ] + 48 ) ) // '0' is ascii value 48, 1 is 49 etc. To convert the digit to its character form, we can just add 48.
} else {
} else {
regex += fmt . Sprintf ( "%c%c-%c%c" , lbracketRune , rune ( startSlc [ i ] + 48 ) , rune ( endSlc [ i ] + 48 ) , rbracketRune )
tmpSt r += fmt . Sprintf ( "%c%c-%c%c" , lbracketRune , rune ( startSlc [ i ] + 48 ) , rune ( endSlc [ i ] + 48 ) , rbracketRune )
}
}
}
}
regex += ")"
tmpStr += ")"
regexSlice = append ( regexSlice , tmpStr )
}
}
regex += ")"
// Each element of the slice represents one 'group'. Taking 0-255 as an example, the elements would be:
// 1. 0-9
// 2. 10-99
// 3. 100-199
// 4. 200-249
// 5. 250-255
//
// The reason this is reversed before joining it, is because it is incompatible with the PCRE rule for matching.
// The PCRE rule specifies that the left-branch of an alternation is preferred. Even though this engine uses the POSIX
// rule at the moment (which prefers the longest match regardless of the order of the alternation), reversing the string
// has no downsides. It doesn't affect POSIX matching, and it will reduce my burden if I decide to switch to PCRE matching.
slices . Reverse ( regexSlice )
regex := string ( nonCapLparenRune ) + strings . Join ( regexSlice , "|" ) + ")"
return regex , nil
return regex , nil
}
}