Added function and examples for ReplaceAllFunc()

Wrote function and example for ReplaceAllLiteral()
Wrote MarshalText() and UnmarshalText() to implement TextMarshaler and TextUnmarshaler
2025-02-10 21:35:51 -05:00 · 2025-02-10 21:25:49 -05:00 · 2025-02-10 12:30:48 -05:00 · 2025-02-10 12:30:17 -05:00 · 2025-02-10 12:29:54 -05:00 · 2025-02-10 09:36:00 -05:00
6 changed files with 429 additions and 72 deletions
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -14,20 +14,41 @@ var notDotChars []rune
 // the startState of the NFA representation of the regex, and the number of capturing
 // groups in the regex. It also contains the expression string.
 type Reg struct {
-	start     *nfaState
+	start         *nfaState
-	numGroups int
+	numGroups     int
-	str       string
+	str           string
 	preferLongest bool
 }
 // NumSubexp returns the number of sub-expressions in the given [Reg]. This is equivalent
 // to the number of capturing groups.
-func (r Reg) NumSubexp() int {
+func (re Reg) NumSubexp() int {
-	return r.numGroups
+	return re.numGroups
 }
 // String returns the string used to compile the expression.
-func (r Reg) String() string {
+func (re Reg) String() string {
-	return r.str
+	return re.str
 }
 // MarshalText implements [encoding.TextMarshaler]. The output is equivalent to that of [Reg.String].
 // Any flags passed as arguments (including calling [Reg.Longest]) are lost.
 func (re *Reg) MarshalText() ([]byte, error) {
 	return []byte(re.String()), nil
 }
 // UnmarshalText implements [encoding.TextUnmarshaler]. It calls [Reg.Compile] on the given byte-slice. If it returns successfully,
 // then the result of the compilation is stored in re. The result of [Reg.Compile] is returned.
 func (re *Reg) UnmarshalText(text []byte) error {
 	newReg, err := Compile(string(text))
 	if err == nil {
 		*re = newReg
 	}
 	return err
 }
 func (re *Reg) Longest() {
 	re.preferLongest = true
 }
 const concatRune rune = 0xF0001
@@ -1135,7 +1156,7 @@ func thompson(re []postfixNode) (Reg, error) {
 	concatenate(nfa[0], &lastState)
 	// The string is empty here, because we add it in Compile()
-	return Reg{nfa[0], numGroups, ""}, nil
+	return Reg{nfa[0], numGroups, "", false}, nil
 }
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -33,7 +33,7 @@ Perl classes:
 	\d				Match any digit character ([0-9])
 	\D				Match any non-digit character ([^0-9])
 	\w				Match any word character ([a-zA-Z0-9_])
-	\W				Match any word character ([^a-zA-Z0-9_])
+	\W				Match any non-word character ([^a-zA-Z0-9_])
 	\s				Match any whitespace character ([ \t\n])
 	\S				Match any non-whitespace character ([^ \t\n])
@@ -105,23 +105,7 @@ The key differences are mentioned below.
 1. Greediness:
-This engine does not support non-greedy operators. All operators are always greedy in nature, and will try
+This engine currently does not support non-greedy operators.
 to match as much as they can, while still allowing for a successful match. For example, given the regex:
 	y*y
 The engine will match as many 'y's as it can, while still allowing the trailing 'y' to be matched.
 Another, more subtle example is the following regex:
 	x|xx
 While the stdlib implementation (and most other engines) will prefer matching the first item of the alternation,
 this engine will go for the longest possible match, regardless of the order of the alternation. Although this
 strays from the convention, it results in a nice rule-of-thumb - the engine is ALWAYS greedy.
 The stdlib implementation has a function [regexp.Regexp.Longest] which makes future searches prefer the longest match.
 That is the default (and unchangable) behavior in this engine.
 2. Byte-slices and runes:
@@ -166,13 +150,13 @@ The following features from [regexp] are (currently) NOT supported:
 1. Named capturing groups
 2. Non-greedy operators
 3. Unicode character classes
- 4. Embedded flags (flags are passed as arguments to [Compile])
+ 4. Embedded flags (flags are instead passed as arguments to [Compile])
 5. Literal text with \Q ... \E
 The following features are not available in [regexp], but are supported in my engine:
 1. Lookarounds
 2. Numeric ranges
-The goal is to shorten the first list, and expand the second.
+I hope to shorten the first list, and expand the second.
 */
 package regex
--- a/regex/example_test.go
+++ b/regex/example_test.go
@@ -2,6 +2,7 @@ package regex_test
 import (
 	"fmt"
 	"strings"
 	"gitea.twomorecents.org/Rockingcool/kleingrep/regex"
 )
@@ -32,12 +33,12 @@ func ExampleReg_FindAll() {
 }
 func ExampleReg_FindString() {
-	regexStr := `\d+`
+	regexStr := `\w+\s+(?=sheep)`
 	regexComp := regex.MustCompile(regexStr)
-	matchStr := regexComp.FindString("The year of our lord, 2025")
+	matchStr := regexComp.FindString("pink cows and yellow sheep")
 	fmt.Println(matchStr)
-	// Output: 2025
+	// Output: yellow
 }
 func ExampleReg_FindSubmatch() {
@@ -52,3 +53,129 @@ func ExampleReg_FindSubmatch() {
 	// 0	1
 	// 2	3
 }
 func ExampleReg_FindStringSubmatch() {
 	regexStr := `(\d{4})-(\d{2})-(\d{2})`
 	regexComp := regex.MustCompile(regexStr)
 	inputStr := `The date is 2025-02-10`
 	match := regexComp.FindStringSubmatch(inputStr)
 	fmt.Println(match[1])
 	fmt.Println(match[3])
 	// Output: 2025
 	// 10
 }
 func ExampleReg_FindAllSubmatch() {
 	regexStr := `(\d)\.(\d)(\d)`
 	regexComp := regex.MustCompile(regexStr)
 	matches := regexComp.FindAllSubmatch("3.14+8.97")
 	fmt.Println(matches[0][0]) // 0-group (entire match) of 1st match (0-indexed)
 	fmt.Println(matches[0][1]) // 1st group of 1st match
 	fmt.Println(matches[1][0]) // 0-group of 2nd match
 	fmt.Println(matches[1][1]) // 1st group of 2nd math
 	// Output: 0	4
 	// 0	1
 	// 5	9
 	// 5	6
 }
 func ExampleReg_FindAllString() {
 	regexStr := `<0-255>\.<0-255>\.<0-255>\.<0-255>`
 	inputStr := `192.168.220.7 pings 9.9.9.9`
 	regexComp := regex.MustCompile(regexStr)
 	matchStrs := regexComp.FindAllString(inputStr)
 	fmt.Println(matchStrs[0])
 	fmt.Println(matchStrs[1])
 	// Output: 192.168.220.7
 	// 9.9.9.9
 }
 func ExampleReg_FindAllStringSubmatch() {
 	// 'https' ...
 	// followed by 1 or more alphanumeric characters (including period) ...
 	// then a forward slash ...
 	// followed by one more of :
 	// 		word character,
 	// 		question mark,
 	// 		period,
 	// 		equals sign
 	regexStr := `https://([a-z0-9\.]+)/([\w.?=]+)`
 	regexComp := regex.MustCompile(regexStr, regex.RE_CASE_INSENSITIVE)
 	inputStr := `You can find me at https://twomorecents.org/index.html and https://news.ycombinator.com/user?id=aadhavans`
 	matchIndices := regexComp.FindAllStringSubmatch(inputStr)
 	fmt.Println(matchIndices[0][1]) // 1st group of 1st match (0-indexed)
 	fmt.Println(matchIndices[0][2]) // 2nd group of 1st match
 	fmt.Println(matchIndices[1][1]) // 1st group of 2nd match
 	fmt.Println(matchIndices[1][2]) // 2nd group of 2nd match
 	// Output: twomorecents.org
 	// index.html
 	// news.ycombinator.com
 	// user?id=aadhavans
 }
 func ExampleReg_Expand() {
 	inputStr := `option1: value1
 	option2: value2`
 	regexStr := `(\w+): (\w+)`
 	templateStr := "$1 = $2\n"
 	regexComp := regex.MustCompile(regexStr, regex.RE_MULTILINE)
 	result := ""
 	for _, submatches := range regexComp.FindAllSubmatch(inputStr) {
 		result = regexComp.Expand(result, templateStr, inputStr, submatches)
 	}
 	fmt.Println(result)
 	// Output: option1 = value1
 	// option2 = value2
 }
 func ExampleReg_LiteralPrefix() {
 	regexStr := `a(b|c)d*`
 	regexComp := regex.MustCompile(regexStr)
 	prefix, complete := regexComp.LiteralPrefix()
 	fmt.Println(prefix)
 	fmt.Println(complete)
 	// Output: a
 	// false
 }
 func ExampleReg_Longest() {
 	regexStr := `x|xx`
 	inputStr := "xx"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.FindString(inputStr))
 	regexComp.Longest()
 	fmt.Println(regexComp.FindString(inputStr))
 	// Output: x
 	// xx
 }
 func ExampleReg_ReplaceAll() {
 	regexStr := `(\d)(\w)`
 	inputStr := "5d9t"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAll(inputStr, `$2$1`))
 	// Output: d5t9
 }
 func ExampleReg_ReplaceAllLiteral() {
 	regexStr := `fox|dog`
 	inputStr := "the quick brown fox jumped over the lazy dog"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAllLiteral(inputStr, `duck`))
 	// Output: the quick brown duck jumped over the lazy duck
 }
 func ExampleReg_ReplaceAllFunc() {
 	regexStr := `\w{5,}`
 	inputStr := `all five or more letter words in this string are capitalized`
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAllFunc(inputStr, strings.ToUpper))
 	// Output: all five or more LETTER WORDS in this STRING are CAPITALIZED
 }
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -2,6 +2,8 @@ package regex
 import (
 	"fmt"
 	"strconv"
 	"unicode"
 )
 // A Match represents a match found by the regex in a given string.
@@ -63,8 +65,8 @@ func copyThread(to *nfaState, from nfaState) {
 // Find returns the 0-group of the leftmost match of the regex in the given string.
 // An error value != nil indicates that no match was found.
-func (regex Reg) Find(str string) (Group, error) {
+func (re Reg) Find(str string) (Group, error) {
-	match, err := regex.FindNthMatch(str, 1)
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Group{}, fmt.Errorf("no matches found")
 	}
@@ -72,15 +74,27 @@ func (regex Reg) Find(str string) (Group, error) {
 }
 // Match returns a boolean value, indicating whether the regex found a match in the given string.
-func (regex Reg) Match(str string) bool {
+func (re Reg) Match(str string) bool {
-	_, err := regex.Find(str)
+	_, err := re.Find(str)
 	return err == nil
 }
 // CompileMatch compiles expr and returns true if str contains a match of the expression.
 // It is equivalent to [regexp.Match].
 // An optional list of flags may be provided (see [ReFlag]).
 // It returns an error (!= nil) if there was an error compiling the expression.
 func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) {
 	re, err := Compile(expr, flags...)
 	if err != nil {
 		return false, err
 	}
 	return re.Match(str), nil
 }
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
-func (regex Reg) FindAll(str string) []Group {
+func (re Reg) FindAll(str string) []Group {
-	indices := regex.FindAllSubmatch(str)
+	indices := re.FindAllSubmatch(str)
 	zeroGroups := funcMap(indices, getZeroGroup)
 	return zeroGroups
 }
@@ -89,8 +103,8 @@ func (regex Reg) FindAll(str string) []Group {
 // The return value will be an empty string in two situations:
 //  1. No match was found
 //  2. The match was an empty string
-func (regex Reg) FindString(str string) string {
+func (re Reg) FindString(str string) string {
-	match, err := regex.FindNthMatch(str, 1)
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return ""
 	}
@@ -103,8 +117,8 @@ func (regex Reg) FindString(str string) string {
 // number of groups. The validity of a group (whether or not it matched anything) can be determined with
 // [Group.IsValid], or by checking that both indices of the group are >= 0.
 // The second-return value is nil if no match was found.
-func (regex Reg) FindSubmatch(str string) (Match, error) {
+func (re Reg) FindSubmatch(str string) (Match, error) {
-	match, err := regex.FindNthMatch(str, 1)
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Match{}, fmt.Errorf("no match found")
 	} else {
@@ -121,9 +135,9 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 //  2. Group n found a zero-length match
 //
 // A return value of nil indicates no match.
-func (regex Reg) FindStringSubmatch(str string) []string {
+func (re Reg) FindStringSubmatch(str string) []string {
-	matchStr := make([]string, regex.numGroups+1)
+	matchStr := make([]string, re.numGroups+1)
-	match, err := regex.FindSubmatch(str)
+	match, err := re.FindSubmatch(str)
 	if err != nil {
 		return nil
 	}
@@ -145,8 +159,8 @@ func (regex Reg) FindStringSubmatch(str string) []string {
 // FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
-func (regex Reg) FindAllString(str string) []string {
+func (re Reg) FindAllString(str string) []string {
-	zerogroups := regex.FindAll(str)
+	zerogroups := re.FindAll(str)
 	matchStrs := funcMap(zerogroups, func(g Group) string {
 		return str[g.StartIdx:g.EndIdx]
 	})
@@ -155,14 +169,14 @@ func (regex Reg) FindAllString(str string) []string {
 // FindNthMatch return the 'n'th match of the regex in the given string.
 // It returns an error (!= nil) if there are fewer than 'n' matches in the string.
-func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
+func (re Reg) FindNthMatch(str string, n int) (Match, error) {
 	idx := 0
 	matchNum := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			matchNum++
 		}
@@ -175,14 +189,14 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
 }
 // FindAllSubmatch returns a slice of matches in the given string.
-func (regex Reg) FindAllSubmatch(str string) []Match {
+func (re Reg) FindAllSubmatch(str string) []Match {
 	idx := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	indices := make([]Match, 0)
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			indices = append(indices, matchIdx)
 		}
@@ -191,7 +205,30 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 	return indices
 }
-func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState) []nfaState {
+// FindAllSubmatch returns a double-slice of strings. Each slice contains the text of a match, including all submatches.
 // A return value of nil indicates no match.
 func (re Reg) FindAllStringSubmatch(str string) [][]string {
 	match := re.FindAllSubmatch(str)
 	if len(match) == 0 {
 		return nil
 	}
 	rtv := make([][]string, len(match))
 	for i := range rtv {
 		rtv[i] = make([]string, re.numGroups+1)
 	}
 	rtv = funcMap(match, func(m Match) []string {
 		return funcMap(m, func(g Group) string {
 			if g.IsValid() {
 				return str[g.StartIdx:g.EndIdx]
 			} else {
 				return ""
 			}
 		})
 	})
 	return rtv
 }
 func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
 	if stateExists(list, state) || stateExists(visited, state) {
 		return list
 	}
@@ -199,32 +236,32 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 	if state.isKleene || state.isQuestion {
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		return list
 	}
 	if state.isAlternation {
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		return list
 	}
 	state.threadGroups = append([]Group{}, threadGroups...)
 	if state.assert != noneAssert {
-		if state.checkAssertion(str, idx) {
+		if state.checkAssertion(str, idx, preferLongest) {
 			copyThread(state.next, state)
-			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
+			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 		}
 	}
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
-		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	if state.groupEnd {
 		state.threadGroups[state.groupNum].EndIdx = idx
-		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	return append(list, state)
@@ -233,7 +270,7 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 // Helper for FindAllMatches. Returns whether it found a match, the
 // first Match it finds, and how far it got into the string ie. where
 // the next search should start from.
-func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
+func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) {
 	// Base case - exit if offset exceeds string's length
 	if offset > len(str) {
 		// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
@@ -248,7 +285,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
 	if start.assert != noneAssert {
-		if start.checkAssertion(str, offset) == false {
+		if start.checkAssertion(str, offset, preferLongest) == false {
 			i++
 			return false, []Group{}, i
 		}
@@ -256,7 +293,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	start.threadGroups = newMatch(numGroups + 1)
 	start.threadGroups[0].StartIdx = i
-	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil)
+	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest)
 	var match Match = nil
 	for idx := i; idx <= len(str); idx++ {
 		if len(currentStates) == 0 {
@@ -273,10 +310,12 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			if currentState.isLast {
 				currentState.threadGroups[0].EndIdx = idx
 				match = append([]Group{}, currentState.threadGroups...)
-				break
+				if !preferLongest {
-			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
+					break
-				if currentState.contentContains(str, idx) {
+				}
-					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil)
+			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
 				if currentState.contentContains(str, idx, preferLongest) {
 					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
 				}
 			}
 		}
@@ -291,3 +330,131 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	}
 	return false, []Group{}, i + 1
 }
 // Expand appends template to dst, expanding any variables in template to the relevant capturing group.
 //
 // A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
 // To insert a literal $, do not put a number after it. Alternatively, you can use $$.
 // src is the input string, and match must be the result of [Reg.FindSubmatch].
 func (re Reg) Expand(dst string, template string, src string, match Match) string {
 	templateRuneSlc := []rune(template)
 	srcRuneSlc := []rune(src)
 	i := 0
 	for i < len(templateRuneSlc) {
 		c := templateRuneSlc[i]
 		if c == '$' {
 			i += 1
 			// The dollar sign is the last character of the string, or it is proceeded by another dollar sign
 			if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' {
 				dst += "$"
 				i++
 			} else {
 				numStr := ""
 				for i < len(templateRuneSlc) && unicode.IsDigit(templateRuneSlc[i]) {
 					numStr += string(templateRuneSlc[i])
 					i++
 				}
 				if numStr == "" {
 					dst += "$"
 				} else {
 					num, _ := strconv.Atoi(numStr)
 					if num < len(match) {
 						dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx])
 					} else {
 						dst += "$" + numStr
 					}
 				}
 			}
 		} else {
 			dst += string(c)
 			i++
 		}
 	}
 	return dst
 }
 // LiteralPrefix returns a string that must begin any match of the given regular expression.
 // The second return value is true if the string comprises the entire expression.
 func (re Reg) LiteralPrefix() (prefix string, complete bool) {
 	state := re.start
 	if state.assert != noneAssert {
 		state = state.next
 	}
 	for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert {
 		if state.groupBegin || state.groupEnd {
 			state = state.next
 			continue
 		}
 		prefix += string(rune(state.content[0]))
 		state = state.next
 	}
 	if state.isLast {
 		complete = true
 	} else {
 		complete = false
 	}
 	return prefix, complete
 }
 // ReplaceAll replaces all matches of the expression in src, with the text in repl. In repl, variables are interpreted
 // as they are in [Reg.Expand]. The resulting string is returned.
 func (re Reg) ReplaceAll(src string, repl string) string {
 	matches := re.FindAllSubmatch(src)
 	i := 0
 	currentMatch := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(matches) && matches[currentMatch][0].IsValid() && i == matches[currentMatch][0].StartIdx {
 			dst += re.Expand("", repl, src, matches[currentMatch])
 			i = matches[currentMatch][0].EndIdx
 			currentMatch++
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
 // ReplaceAllLiteral replaces all matches of the expression in src, with the text in repl. The text is replaced directly,
 // without any expansion.
 func (re Reg) ReplaceAllLiteral(src string, repl string) string {
 	zerogroups := re.FindAll(src)
 	currentMatch := 0
 	i := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
 			dst += repl
 			i = zerogroups[currentMatch].EndIdx
 			currentMatch += 1
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
 // ReplaceAllFunc replaces every match of the expression in src, with the return value of the function replFunc.
 // replFunc takes in the matched string. The return value is substituted in directly without expasion.
 func (re Reg) ReplaceAllFunc(src string, replFunc func(string) string) string {
 	zerogroups := re.FindAll(src)
 	currentMatch := 0
 	i := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
 			dst += replFunc(src[zerogroups[currentMatch].StartIdx:zerogroups[currentMatch].EndIdx])
 			i = zerogroups[currentMatch].EndIdx
 			currentMatch += 1
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -133,7 +133,7 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 // Checks if the given state's assertion is true. Returns true if the given
 // state doesn't have an assertion.
-func (s nfaState) checkAssertion(str []rune, idx int) bool {
+func (s nfaState) checkAssertion(str []rune, idx int, preferLongest bool) bool {
 	if s.assert == alwaysTrueAssert {
 		return true
 	}
@@ -183,7 +183,7 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 			strToMatch = string(runesToMatch)
 		}
-		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex}
+		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex, preferLongest}
 		matchIndices := regComp.FindAll(strToMatch)
 		numMatchesFound := 0
@@ -210,9 +210,9 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 }
 // Returns true if the contents of 's' contain the value at the given index of the given string
-func (s nfaState) contentContains(str []rune, idx int) bool {
+func (s nfaState) contentContains(str []rune, idx int, preferLongest bool) bool {
 	if s.assert != noneAssert {
-		return s.checkAssertion(str, idx)
+		return s.checkAssertion(str, idx, preferLongest)
 	}
 	if idx >= len(str) {
 		return false
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -25,7 +25,9 @@ var reTests = []struct {
 	{"a*b", nil, "qwqw", []Group{}},
 	{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
 	{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
-	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
+	// This match will only happen with Longest()
 	// {"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
 	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}},
 	{"b*a*a", nil, "bba", []Group{{0, 3}}},
 	{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
 	{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
@@ -537,7 +539,9 @@ var groupTests = []struct {
 	{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
 	{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	// This match will only happen with Longest()
 	//	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
 	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 3}, {0, 3}, {-1, -1}}}},
 	{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
 	{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
@@ -857,6 +861,60 @@ func TestFindStringSubmatch(t *testing.T) {
 		})
 	}
 }
 func TestFindAllStringSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
 			regComp, err := Compile(test.re, test.flags...)
 			if err != nil {
 				if test.result != nil {
 					panic(err)
 				}
 			}
 			matchStrs := regComp.FindAllStringSubmatch(test.str)
 			if matchStrs == nil {
 				if len(test.result) != 0 {
 					expectedStrs := funcMap(test.result, func(m Match) []string {
 						return funcMap(m, func(g Group) string {
 							if g.IsValid() {
 								return test.str[g.StartIdx:g.EndIdx]
 							} else {
 								return ""
 							}
 						})
 					})
 					t.Errorf("Wanted %v got no match\n", expectedStrs)
 				}
 			} else if len(test.result) == 0 {
 				t.Errorf("Wanted no match got %v\n", matchStrs)
 			} else {
 				expectedStrs := funcMap(test.result, func(m Match) []string {
 					return funcMap(m, func(g Group) string {
 						if g.IsValid() {
 							return test.str[g.StartIdx:g.EndIdx]
 						} else {
 							return ""
 						}
 					})
 				})
 				for i, matchStr := range matchStrs {
 					for j, groupStr := range matchStr {
 						if groupStr == "" {
 							if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
 								t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
 							}
 						} else {
 							if expectedStrs[i][j] != groupStr {
 								t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
 							}
 						}
 					}
 				}
 			}
 		})
 	}
 }
 func TestFindAllSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
Author	SHA1	Message	Date
Aadhavan Srinivasan	073f231b89	Added function and examples for ReplaceAllFunc()	2025-02-10 21:35:51 -05:00
Aadhavan Srinivasan	3b7257c921	Wrote function and example for ReplaceAllLiteral()	2025-02-10 21:25:49 -05:00
Aadhavan Srinivasan	668df8b70a	Wrote MarshalText() and UnmarshalText() to implement TextMarshaler and TextUnmarshaler	2025-02-10 12:30:48 -05:00
Aadhavan Srinivasan	214acf7e0f	Wrote example for ReplaceAll(); fixed out-of-bounds bug in Expand()	2025-02-10 12:30:17 -05:00
Aadhavan Srinivasan	50221ff4d9	Wrote ReplaceAll(), to replace all matches of the regex with a given string	2025-02-10 12:29:54 -05:00
Aadhavan Srinivasan	5ab95f512a	Updated docs	2025-02-10 09:36:00 -05:00
Aadhavan Srinivasan	e7da678408	Removed obsolete documentation	2025-02-10 09:35:16 -05:00
Aadhavan Srinivasan	ab363e2766	Rewrote test for 'FindString()' to use lookarounds	2025-02-10 09:24:47 -05:00
Aadhavan Srinivasan	c803e45415	Added example for 'FindStringSubmatch()'	2025-02-10 09:19:24 -05:00
Aadhavan Srinivasan	525296f239	Added examples for 'FindAllString()' , 'FindAllSubmatch()' and 'FindAllStringSubmatch()'	2025-02-10 09:10:39 -05:00
Aadhavan Srinivasan	eb0ab9f7ec	Wrote test for FindAllStringSubmatch()	2025-02-10 08:39:20 -05:00
Aadhavan Srinivasan	17a7dbae4c	Wrote FindAllStringSubmatch()	2025-02-10 08:39:10 -05:00
Aadhavan Srinivasan	f2279acd98	Fixed mistake in docs	2025-02-10 08:12:09 -05:00
Aadhavan Srinivasan	662527c478	Merge pull request 'Implement PCRE Matching (prefer left-branch)' (#2 ) from implementPCREMatchingRules into master Reviewed-on: #2	2025-02-09 15:24:26 -06:00
Aadhavan Srinivasan	d1958f289c	Commented out tests that would only pass with Longest()	2025-02-09 16:08:16 -05:00
Aadhavan Srinivasan	15ee49f42e	Rename method receivers from 'regex' to 're' (it's shorter)	2025-02-09 15:51:46 -05:00
Aadhavan Srinivasan	b60ded4136	Don't break when a match is found, if we are looking for the longest match	2025-02-09 15:48:33 -05:00
Aadhavan Srinivasan	9fbb99f86c	Wrote example for Longest()	2025-02-09 15:47:57 -05:00
Aadhavan Srinivasan	af15904f3b	Updated documentation	2025-02-09 15:41:13 -05:00
Aadhavan Srinivasan	d522f50b50	Wrote new example functions	2025-02-09 15:40:59 -05:00
Aadhavan Srinivasan	fb47e082eb	Wrote new methods Expand() and preferLongest(); Use new function signatures (with preferLongest); only characters should be added to next state list	2025-02-09 15:40:39 -05:00
Aadhavan Srinivasan	1f5a363539	Use new function signatures (with preferLongest)	2025-02-09 15:39:09 -05:00
Aadhavan Srinivasan	9e12f9dcb3	Added field to Reg, denoting if we prefer longest match (POSIX style) or not (perl style)	2025-02-09 15:38:26 -05:00