You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

143 lines
4.2 KiB
Go

package regex
3 months ago
import (
"slices"
"unicode"
3 months ago
)
var whitespaceChars = []rune{' ', '\t', '\n'}
var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
var lbracketRune rune = 0xF0002
var rbracketRune rune = 0xF0003
var anyCharRune rune = 0xF0004 // Represents any character - used for states where the allChars flag is on.
var lparenRune rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
var rparenRune rune = 0xF0006
var nonCapLparenRune rune = 0xF0007 // Represents a non-capturing group's LPAREN
var escBackslashRune rune = 0xF0008 // Represents an escaped backslash
var CHAR_RANGE rune = 0xF0009 // Represents a character range
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
// An interface for int and rune, which are identical
type character interface {
int | rune
}
// Returns true if str[idx] and str[idx-1] are separated by a word boundary.
func isWordBoundary(str []rune, idx int) bool {
str_runes := []rune(str)
wbounded := idx == 0 ||
idx >= len(str) ||
(!slices.Contains(wordChars, str_runes[idx-1]) && slices.Contains(wordChars, str_runes[idx])) ||
(slices.Contains(wordChars, str_runes[idx-1]) && !slices.Contains(wordChars, str_runes[idx]))
return wbounded
}
func isSpecialChar(c rune) bool {
return slices.Contains(specialChars, c)
}
// Some special characters have metacharacter replacements. These characters, when encountered in their literal form, can be treated as regular characters.
func isSpecialCharWithMetacharReplacement(c rune) bool {
return slices.Contains([]rune{'[', ']'}, c)
}
func isNormalChar(c rune) bool {
return !slices.Contains(specialChars, c)
3 months ago
}
// Ensure that the given elements are only appended to the given slice if they
// don't already exist. Returns the new slice, and the number of unique items appended.
func unique_append[T comparable](slc []T, items ...T) ([]T, int) {
num_appended := 0
for _, item := range items {
if !slices.Contains(slc, item) {
slc = append(slc, item)
num_appended++
}
}
return slc, num_appended
}
// Returns true only if all the given elements are equal
func allEqual[T comparable](items ...T) bool {
first := items[0]
for _, item := range items {
if item != first {
return false
}
}
return true
}
// Map function - convert a slice of T to a slice of V, based on a function
// that maps a T to a V
func funcMap[T, V any](slc []T, fn func(T) V) []V {
toReturn := make([]V, len(slc))
for i, val := range slc {
toReturn[i] = fn(val)
}
return toReturn
}
// Reduce function - reduces a slice of a type into a value of the type,
// based on the given function.
func funcReduce[T any](slc []T, fn func(T, T) T) T {
if len(slc) == 0 {
panic("Reduce on empty slice.")
}
for len(slc) > 1 {
v1 := slc[0]
v2 := slc[1]
slc = slc[1:]
slc[0] = fn(v1, v2)
}
return slc[0]
}
// Generate numbers in a range - start to end (both inclusive)
func genRangeInclusive[T character](start, end T) []T {
toRet := make([]T, (end-start)+1)
for i := start; i <= end; i++ {
toRet[i-start] = i
}
return toRet
}
// Returns a rune-slice containing all possible cases of the given rune, given the
// 'caseInsensitive' boolean variable.
// If this variable is false, the rune is returned as-is, without modifications.
// If it is true, then we return all possible cases of the
// rune.
// At the moment, this includes:
// 1. Upper case
// 2. Lower case
// 3. Title case
func allCases(r rune, caseInsensitive bool) []rune {
if caseInsensitive {
return []rune{unicode.ToLower(r), unicode.ToUpper(r), unicode.ToTitle(r)}
} else {
return []rune{r}
}
}
func isHex(c rune) bool {
return slices.Contains([]rune("0123456789abcdefABCDEF"), c)
}
func isOctal(c rune) bool {
return slices.Contains([]rune("01234567"), c)
}
// Replace an element in a slice with another, given both values
func replaceByValue[T comparable](slc []T, toReplace T, replaceWith T) []T {
for i, val := range slc {
if val == toReplace {
slc[i] = replaceWith
}
}
return slc
}