package regex
import (
"slices"
"unicode"
)
var whitespaceChars = [ ] rune { ' ' , '\t' , '\n' }
var digitChars = [ ] rune { '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' }
var wordChars = [ ] rune ( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" )
var LBRACKET rune = 0xF0002
var RBRACKET rune = 0xF0003
var ANY_CHAR rune = 0xF0004 // Represents any character - used for states where the allChars flag is on.
var LPAREN_CHAR rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
var RPAREN_CHAR rune = 0xF0006
var NONCAPLPAREN_CHAR rune = 0xF0007 // Represents a non-capturing group's LPAREN
var ESC_BACKSLASH rune = 0xF0008 // Represents an escaped backslash
var CHAR_RANGE rune = 0xF0009 // Represents a character range
var specialChars = [ ] rune { '?' , '*' , '\\' , '^' , '$' , '{' , '}' , '(' , ')' , '[' , ']' , '+' , '|' , '.' , concatRune , '<' , '>' , LBRACKET , RBRACKET , NONCAPLPAREN_CHAR }
// An interface for int and rune, which are identical
type character interface {
int | rune
}
// Returns true if str[idx] and str[idx-1] are separated by a word boundary.
func isWordBoundary ( str [ ] rune , idx int ) bool {
str_runes := [ ] rune ( str )
wbounded := idx == 0 ||
idx >= len ( str ) ||
( ! slices . Contains ( wordChars , str_runes [ idx - 1 ] ) && slices . Contains ( wordChars , str_runes [ idx ] ) ) ||
( slices . Contains ( wordChars , str_runes [ idx - 1 ] ) && ! slices . Contains ( wordChars , str_runes [ idx ] ) )
return wbounded
}
func isSpecialChar ( c rune ) bool {
return slices . Contains ( specialChars , c )
}
// Some special characters have metacharacter replacements. These characters, when encountered in their literal form, can be treated as regular characters.
func isSpecialCharWithMetacharReplacement ( c rune ) bool {
return slices . Contains ( [ ] rune { '[' , ']' } , c )
}
func isNormalChar ( c rune ) bool {
return ! slices . Contains ( specialChars , c )
}
// Ensure that the given elements are only appended to the given slice if they
// don't already exist. Returns the new slice, and the number of unique items appended.
func unique_append [ T comparable ] ( slc [ ] T , items ... T ) ( [ ] T , int ) {
num_appended := 0
for _ , item := range items {
if ! slices . Contains ( slc , item ) {
slc = append ( slc , item )
num_appended ++
}
}
return slc , num_appended
}
// Returns true only if all the given elements are equal
func allEqual [ T comparable ] ( items ... T ) bool {
first := items [ 0 ]
for _ , item := range items {
if item != first {
return false
}
}
return true
}
// Map function - convert a slice of T to a slice of V, based on a function
// that maps a T to a V
func funcMap [ T , V any ] ( slc [ ] T , fn func ( T ) V ) [ ] V {
toReturn := make ( [ ] V , len ( slc ) )
for i , val := range slc {
toReturn [ i ] = fn ( val )
}
return toReturn
}
// Reduce function - reduces a slice of a type into a value of the type,
// based on the given function.
func funcReduce [ T any ] ( slc [ ] T , fn func ( T , T ) T ) T {
if len ( slc ) == 0 {
panic ( "Reduce on empty slice." )
}
for len ( slc ) > 1 {
v1 := slc [ 0 ]
v2 := slc [ 1 ]
slc = slc [ 1 : ]
slc [ 0 ] = fn ( v1 , v2 )
}
return slc [ 0 ]
}
// Generate numbers in a range - start to end (both inclusive)
func genRangeInclusive [ T character ] ( start , end T ) [ ] T {
toRet := make ( [ ] T , ( end - start ) + 1 )
for i := start ; i <= end ; i ++ {
toRet [ i - start ] = i
}
return toRet
}
// Returns a rune-slice containing all possible cases of the given rune, given the
// 'caseInsensitive' boolean variable.
// If this variable is false, the rune is returned as-is, without modifications.
// If it is true, then we return all possible cases of the
// rune.
// At the moment, this includes:
// 1. Upper case
// 2. Lower case
// 3. Title case
func allCases ( r rune , caseInsensitive bool ) [ ] rune {
if caseInsensitive {
return [ ] rune { unicode . ToLower ( r ) , unicode . ToUpper ( r ) , unicode . ToTitle ( r ) }
} else {
return [ ] rune { r }
}
}
func isHex ( c rune ) bool {
return slices . Contains ( [ ] rune ( "0123456789abcdefABCDEF" ) , c )
}
func isOctal ( c rune ) bool {
return slices . Contains ( [ ] rune ( "01234567" ) , c )
}
// Replace an element in a slice with another, given both values
func replaceByValue [ T comparable ] ( slc [ ] T , toReplace T , replaceWith T ) [ ] T {
for i , val := range slc {
if val == toReplace {
slc [ i ] = replaceWith
}
}
return slc
}