package regex import ( "slices" "unicode" ) var whitespaceChars = []rune{' ', '\t', '\n'} var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_") var lbracketRune rune = 0xF0002 var rbracketRune rune = 0xF0003 var anyCharRune rune = 0xF0004 // Represents any character - used for states where the allChars flag is on. var lparenRune rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses var rparenRune rune = 0xF0006 var nonCapLparenRune rune = 0xF0007 // Represents a non-capturing group's LPAREN var escBackslashRune rune = 0xF0008 // Represents an escaped backslash var charRangeRune rune = 0xF0009 // Represents a character range var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune} // An interface for int and rune, which are identical type character interface { int | rune } // Returns true if str[idx] and str[idx-1] are separated by a word boundary. func isWordBoundary(str []rune, idx int) bool { str_runes := []rune(str) wbounded := idx == 0 || idx >= len(str) || (!slices.Contains(wordChars, str_runes[idx-1]) && slices.Contains(wordChars, str_runes[idx])) || (slices.Contains(wordChars, str_runes[idx-1]) && !slices.Contains(wordChars, str_runes[idx])) return wbounded } func isSpecialChar(c rune) bool { return slices.Contains(specialChars, c) } // Some special characters have metacharacter replacements. These characters, when encountered in their literal form, can be treated as regular characters. func isSpecialCharWithMetacharReplacement(c rune) bool { return slices.Contains([]rune{'[', ']'}, c) } func isNormalChar(c rune) bool { return !slices.Contains(specialChars, c) } // Ensure that the given elements are only appended to the given slice if they // don't already exist. Returns the new slice, and the number of unique items appended. func unique_append[T comparable](slc []T, items ...T) ([]T, int) { num_appended := 0 for _, item := range items { if !slices.Contains(slc, item) { slc = append(slc, item) num_appended++ } } return slc, num_appended } // Returns true only if all the given elements are equal func allEqual[T comparable](items ...T) bool { first := items[0] for _, item := range items { if item != first { return false } } return true } // Map function - convert a slice of T to a slice of V, based on a function // that maps a T to a V func funcMap[T, V any](slc []T, fn func(T) V) []V { toReturn := make([]V, len(slc)) for i, val := range slc { toReturn[i] = fn(val) } return toReturn } // Reduce function - reduces a slice of a type into a value of the type, // based on the given function. func funcReduce[T any](slc []T, fn func(T, T) T) T { if len(slc) == 0 { panic("Reduce on empty slice.") } for len(slc) > 1 { v1 := slc[0] v2 := slc[1] slc = slc[1:] slc[0] = fn(v1, v2) } return slc[0] } // Generate numbers in a range - start to end (both inclusive) func genRangeInclusive[T character](start, end T) []T { toRet := make([]T, (end-start)+1) for i := start; i <= end; i++ { toRet[i-start] = i } return toRet } // Returns a rune-slice containing all possible cases of the given rune, given the // 'caseInsensitive' boolean variable. // If this variable is false, the rune is returned as-is, without modifications. // If it is true, then we return all possible cases of the // rune. // At the moment, this includes: // 1. Upper case // 2. Lower case // 3. Title case func allCases(r rune, caseInsensitive bool) []rune { if caseInsensitive { return []rune{unicode.ToLower(r), unicode.ToUpper(r), unicode.ToTitle(r)} } else { return []rune{r} } } func isHex(c rune) bool { return slices.Contains([]rune("0123456789abcdefABCDEF"), c) } func isOctal(c rune) bool { return slices.Contains([]rune("01234567"), c) } // Replace an element in a slice with another, given both values func replaceByValue[T comparable](slc []T, toReplace T, replaceWith T) []T { for i, val := range slc { if val == toReplace { slc[i] = replaceWith } } return slc }