Added function and examples for ReplaceAllFunc()

Wrote function and example for ReplaceAllLiteral()
Wrote MarshalText() and UnmarshalText() to implement TextMarshaler and TextUnmarshaler
2025-02-10 21:35:51 -05:00 · 2025-02-10 21:25:49 -05:00 · 2025-02-10 12:30:48 -05:00 · 2025-02-10 12:30:17 -05:00 · 2025-02-10 12:29:54 -05:00 · 2025-02-10 09:36:00 -05:00
6 changed files with 429 additions and 72 deletions
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -14,20 +14,41 @@ var notDotChars []rune
 // the startState of the NFA representation of the regex, and the number of capturing
 // groups in the regex. It also contains the expression string.
 type Reg struct {
-	start     *nfaState
-	numGroups int
-	str       string
+	start         *nfaState
+	numGroups     int
+	str           string
+	preferLongest bool
 }

 // NumSubexp returns the number of sub-expressions in the given [Reg]. This is equivalent
 // to the number of capturing groups.
-func (r Reg) NumSubexp() int {
-	return r.numGroups
+func (re Reg) NumSubexp() int {
+	return re.numGroups
 }

 // String returns the string used to compile the expression.
-func (r Reg) String() string {
-	return r.str
+func (re Reg) String() string {
+	return re.str
+}
+
+// MarshalText implements [encoding.TextMarshaler]. The output is equivalent to that of [Reg.String].
+// Any flags passed as arguments (including calling [Reg.Longest]) are lost.
+func (re *Reg) MarshalText() ([]byte, error) {
+	return []byte(re.String()), nil
+}
+
+// UnmarshalText implements [encoding.TextUnmarshaler]. It calls [Reg.Compile] on the given byte-slice. If it returns successfully,
+// then the result of the compilation is stored in re. The result of [Reg.Compile] is returned.
+func (re *Reg) UnmarshalText(text []byte) error {
+	newReg, err := Compile(string(text))
+	if err == nil {
+		*re = newReg
+	}
+	return err
+}
+
+func (re *Reg) Longest() {
+	re.preferLongest = true
 }

 const concatRune rune = 0xF0001
@@ -1135,7 +1156,7 @@ func thompson(re []postfixNode) (Reg, error) {
 	concatenate(nfa[0], &lastState)

 	// The string is empty here, because we add it in Compile()
-	return Reg{nfa[0], numGroups, ""}, nil
+	return Reg{nfa[0], numGroups, "", false}, nil

 }

--- a/regex/doc.go
+++ b/regex/doc.go
@@ -33,7 +33,7 @@ Perl classes:
 	\d				Match any digit character ([0-9])
 	\D				Match any non-digit character ([^0-9])
 	\w				Match any word character ([a-zA-Z0-9_])
-	\W				Match any word character ([^a-zA-Z0-9_])
+	\W				Match any non-word character ([^a-zA-Z0-9_])
 	\s				Match any whitespace character ([ \t\n])
 	\S				Match any non-whitespace character ([^ \t\n])

@@ -105,23 +105,7 @@ The key differences are mentioned below.

 1. Greediness:

-This engine does not support non-greedy operators. All operators are always greedy in nature, and will try
-to match as much as they can, while still allowing for a successful match. For example, given the regex:
-
-	y*y
-
-The engine will match as many 'y's as it can, while still allowing the trailing 'y' to be matched.
-
-Another, more subtle example is the following regex:
-
-	x|xx
-
-While the stdlib implementation (and most other engines) will prefer matching the first item of the alternation,
-this engine will go for the longest possible match, regardless of the order of the alternation. Although this
-strays from the convention, it results in a nice rule-of-thumb - the engine is ALWAYS greedy.
-
-The stdlib implementation has a function [regexp.Regexp.Longest] which makes future searches prefer the longest match.
-That is the default (and unchangable) behavior in this engine.
+This engine currently does not support non-greedy operators.

 2. Byte-slices and runes:

@@ -166,13 +150,13 @@ The following features from [regexp] are (currently) NOT supported:
 1. Named capturing groups
 2. Non-greedy operators
 3. Unicode character classes
- 4. Embedded flags (flags are passed as arguments to [Compile])
+ 4. Embedded flags (flags are instead passed as arguments to [Compile])
 5. Literal text with \Q ... \E

 The following features are not available in [regexp], but are supported in my engine:
 1. Lookarounds
 2. Numeric ranges

-The goal is to shorten the first list, and expand the second.
+I hope to shorten the first list, and expand the second.
 */
 package regex
--- a/regex/example_test.go
+++ b/regex/example_test.go
@@ -2,6 +2,7 @@ package regex_test

 import (
 	"fmt"
+	"strings"

 	"gitea.twomorecents.org/Rockingcool/kleingrep/regex"
 )
@@ -32,12 +33,12 @@ func ExampleReg_FindAll() {
 }

 func ExampleReg_FindString() {
-	regexStr := `\d+`
+	regexStr := `\w+\s+(?=sheep)`
 	regexComp := regex.MustCompile(regexStr)

-	matchStr := regexComp.FindString("The year of our lord, 2025")
+	matchStr := regexComp.FindString("pink cows and yellow sheep")
 	fmt.Println(matchStr)
-	// Output: 2025
+	// Output: yellow
 }

 func ExampleReg_FindSubmatch() {
@@ -52,3 +53,129 @@ func ExampleReg_FindSubmatch() {
 	// 0	1
 	// 2	3
 }
+
+func ExampleReg_FindStringSubmatch() {
+	regexStr := `(\d{4})-(\d{2})-(\d{2})`
+	regexComp := regex.MustCompile(regexStr)
+	inputStr := `The date is 2025-02-10`
+
+	match := regexComp.FindStringSubmatch(inputStr)
+	fmt.Println(match[1])
+	fmt.Println(match[3])
+	// Output: 2025
+	// 10
+}
+
+func ExampleReg_FindAllSubmatch() {
+	regexStr := `(\d)\.(\d)(\d)`
+	regexComp := regex.MustCompile(regexStr)
+
+	matches := regexComp.FindAllSubmatch("3.14+8.97")
+	fmt.Println(matches[0][0]) // 0-group (entire match) of 1st match (0-indexed)
+	fmt.Println(matches[0][1]) // 1st group of 1st match
+	fmt.Println(matches[1][0]) // 0-group of 2nd match
+	fmt.Println(matches[1][1]) // 1st group of 2nd math
+	// Output: 0	4
+	// 0	1
+	// 5	9
+	// 5	6
+}
+
+func ExampleReg_FindAllString() {
+	regexStr := `<0-255>\.<0-255>\.<0-255>\.<0-255>`
+	inputStr := `192.168.220.7 pings 9.9.9.9`
+	regexComp := regex.MustCompile(regexStr)
+
+	matchStrs := regexComp.FindAllString(inputStr)
+
+	fmt.Println(matchStrs[0])
+	fmt.Println(matchStrs[1])
+	// Output: 192.168.220.7
+	// 9.9.9.9
+}
+
+func ExampleReg_FindAllStringSubmatch() {
+	// 'https' ...
+	// followed by 1 or more alphanumeric characters (including period) ...
+	// then a forward slash ...
+	// followed by one more of :
+	// 		word character,
+	// 		question mark,
+	// 		period,
+	// 		equals sign
+	regexStr := `https://([a-z0-9\.]+)/([\w.?=]+)`
+	regexComp := regex.MustCompile(regexStr, regex.RE_CASE_INSENSITIVE)
+	inputStr := `You can find me at https://twomorecents.org/index.html and https://news.ycombinator.com/user?id=aadhavans`
+
+	matchIndices := regexComp.FindAllStringSubmatch(inputStr)
+	fmt.Println(matchIndices[0][1]) // 1st group of 1st match (0-indexed)
+	fmt.Println(matchIndices[0][2]) // 2nd group of 1st match
+	fmt.Println(matchIndices[1][1]) // 1st group of 2nd match
+	fmt.Println(matchIndices[1][2]) // 2nd group of 2nd match
+	// Output: twomorecents.org
+	// index.html
+	// news.ycombinator.com
+	// user?id=aadhavans
+
+}
+
+func ExampleReg_Expand() {
+	inputStr := `option1: value1
+	option2: value2`
+	regexStr := `(\w+): (\w+)`
+	templateStr := "$1 = $2\n"
+	regexComp := regex.MustCompile(regexStr, regex.RE_MULTILINE)
+	result := ""
+	for _, submatches := range regexComp.FindAllSubmatch(inputStr) {
+		result = regexComp.Expand(result, templateStr, inputStr, submatches)
+	}
+	fmt.Println(result)
+	// Output: option1 = value1
+	// option2 = value2
+
+}
+
+func ExampleReg_LiteralPrefix() {
+	regexStr := `a(b|c)d*`
+	regexComp := regex.MustCompile(regexStr)
+	prefix, complete := regexComp.LiteralPrefix()
+	fmt.Println(prefix)
+	fmt.Println(complete)
+	// Output: a
+	// false
+}
+
+func ExampleReg_Longest() {
+	regexStr := `x|xx`
+	inputStr := "xx"
+	regexComp := regex.MustCompile(regexStr)
+	fmt.Println(regexComp.FindString(inputStr))
+	regexComp.Longest()
+	fmt.Println(regexComp.FindString(inputStr))
+	// Output: x
+	// xx
+}
+
+func ExampleReg_ReplaceAll() {
+	regexStr := `(\d)(\w)`
+	inputStr := "5d9t"
+	regexComp := regex.MustCompile(regexStr)
+	fmt.Println(regexComp.ReplaceAll(inputStr, `$2$1`))
+	// Output: d5t9
+}
+
+func ExampleReg_ReplaceAllLiteral() {
+	regexStr := `fox|dog`
+	inputStr := "the quick brown fox jumped over the lazy dog"
+	regexComp := regex.MustCompile(regexStr)
+	fmt.Println(regexComp.ReplaceAllLiteral(inputStr, `duck`))
+	// Output: the quick brown duck jumped over the lazy duck
+}
+
+func ExampleReg_ReplaceAllFunc() {
+	regexStr := `\w{5,}`
+	inputStr := `all five or more letter words in this string are capitalized`
+	regexComp := regex.MustCompile(regexStr)
+	fmt.Println(regexComp.ReplaceAllFunc(inputStr, strings.ToUpper))
+	// Output: all five or more LETTER WORDS in this STRING are CAPITALIZED
+}
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -2,6 +2,8 @@ package regex

 import (
 	"fmt"
+	"strconv"
+	"unicode"
 )

 // A Match represents a match found by the regex in a given string.
@@ -63,8 +65,8 @@ func copyThread(to *nfaState, from nfaState) {

 // Find returns the 0-group of the leftmost match of the regex in the given string.
 // An error value != nil indicates that no match was found.
-func (regex Reg) Find(str string) (Group, error) {
-	match, err := regex.FindNthMatch(str, 1)
+func (re Reg) Find(str string) (Group, error) {
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Group{}, fmt.Errorf("no matches found")
 	}
@@ -72,15 +74,27 @@ func (regex Reg) Find(str string) (Group, error) {
 }

 // Match returns a boolean value, indicating whether the regex found a match in the given string.
-func (regex Reg) Match(str string) bool {
-	_, err := regex.Find(str)
+func (re Reg) Match(str string) bool {
+	_, err := re.Find(str)
 	return err == nil
 }

+// CompileMatch compiles expr and returns true if str contains a match of the expression.
+// It is equivalent to [regexp.Match].
+// An optional list of flags may be provided (see [ReFlag]).
+// It returns an error (!= nil) if there was an error compiling the expression.
+func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) {
+	re, err := Compile(expr, flags...)
+	if err != nil {
+		return false, err
+	}
+	return re.Match(str), nil
+}
+
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
-func (regex Reg) FindAll(str string) []Group {
-	indices := regex.FindAllSubmatch(str)
+func (re Reg) FindAll(str string) []Group {
+	indices := re.FindAllSubmatch(str)
 	zeroGroups := funcMap(indices, getZeroGroup)
 	return zeroGroups
 }
@@ -89,8 +103,8 @@ func (regex Reg) FindAll(str string) []Group {
 // The return value will be an empty string in two situations:
 //  1. No match was found
 //  2. The match was an empty string
-func (regex Reg) FindString(str string) string {
-	match, err := regex.FindNthMatch(str, 1)
+func (re Reg) FindString(str string) string {
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return ""
 	}
@@ -103,8 +117,8 @@ func (regex Reg) FindString(str string) string {
 // number of groups. The validity of a group (whether or not it matched anything) can be determined with
 // [Group.IsValid], or by checking that both indices of the group are >= 0.
 // The second-return value is nil if no match was found.
-func (regex Reg) FindSubmatch(str string) (Match, error) {
-	match, err := regex.FindNthMatch(str, 1)
+func (re Reg) FindSubmatch(str string) (Match, error) {
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Match{}, fmt.Errorf("no match found")
 	} else {
@@ -121,9 +135,9 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 //  2. Group n found a zero-length match
 //
 // A return value of nil indicates no match.
-func (regex Reg) FindStringSubmatch(str string) []string {
-	matchStr := make([]string, regex.numGroups+1)
-	match, err := regex.FindSubmatch(str)
+func (re Reg) FindStringSubmatch(str string) []string {
+	matchStr := make([]string, re.numGroups+1)
+	match, err := re.FindSubmatch(str)
 	if err != nil {
 		return nil
 	}
@@ -145,8 +159,8 @@ func (regex Reg) FindStringSubmatch(str string) []string {
 // FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
-func (regex Reg) FindAllString(str string) []string {
-	zerogroups := regex.FindAll(str)
+func (re Reg) FindAllString(str string) []string {
+	zerogroups := re.FindAll(str)
 	matchStrs := funcMap(zerogroups, func(g Group) string {
 		return str[g.StartIdx:g.EndIdx]
 	})
@@ -155,14 +169,14 @@ func (regex Reg) FindAllString(str string) []string {

 // FindNthMatch return the 'n'th match of the regex in the given string.
 // It returns an error (!= nil) if there are fewer than 'n' matches in the string.
-func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
+func (re Reg) FindNthMatch(str string, n int) (Match, error) {
 	idx := 0
 	matchNum := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			matchNum++
 		}
@@ -175,14 +189,14 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
 }

 // FindAllSubmatch returns a slice of matches in the given string.
-func (regex Reg) FindAllSubmatch(str string) []Match {
+func (re Reg) FindAllSubmatch(str string) []Match {
 	idx := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	indices := make([]Match, 0)
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			indices = append(indices, matchIdx)
 		}
@@ -191,7 +205,30 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 	return indices
 }

-func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState) []nfaState {
+// FindAllSubmatch returns a double-slice of strings. Each slice contains the text of a match, including all submatches.
+// A return value of nil indicates no match.
+func (re Reg) FindAllStringSubmatch(str string) [][]string {
+	match := re.FindAllSubmatch(str)
+	if len(match) == 0 {
+		return nil
+	}
+	rtv := make([][]string, len(match))
+	for i := range rtv {
+		rtv[i] = make([]string, re.numGroups+1)
+	}
+	rtv = funcMap(match, func(m Match) []string {
+		return funcMap(m, func(g Group) string {
+			if g.IsValid() {
+				return str[g.StartIdx:g.EndIdx]
+			} else {
+				return ""
+			}
+		})
+	})
+	return rtv
+}
+
+func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
 	if stateExists(list, state) || stateExists(visited, state) {
 		return list
 	}
@@ -199,32 +236,32 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread

 	if state.isKleene || state.isQuestion {
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		return list
 	}
 	if state.isAlternation {
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		return list
 	}
 	state.threadGroups = append([]Group{}, threadGroups...)
 	if state.assert != noneAssert {
-		if state.checkAssertion(str, idx) {
+		if state.checkAssertion(str, idx, preferLongest) {
 			copyThread(state.next, state)
-			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
+			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 		}
 	}
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
-		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	if state.groupEnd {
 		state.threadGroups[state.groupNum].EndIdx = idx
-		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	return append(list, state)

@@ -233,7 +270,7 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 // Helper for FindAllMatches. Returns whether it found a match, the
 // first Match it finds, and how far it got into the string ie. where
 // the next search should start from.
-func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
+func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) {
 	// Base case - exit if offset exceeds string's length
 	if offset > len(str) {
 		// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
@@ -248,7 +285,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
 	if start.assert != noneAssert {
-		if start.checkAssertion(str, offset) == false {
+		if start.checkAssertion(str, offset, preferLongest) == false {
 			i++
 			return false, []Group{}, i
 		}
@@ -256,7 +293,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in

 	start.threadGroups = newMatch(numGroups + 1)
 	start.threadGroups[0].StartIdx = i
-	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil)
+	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest)
 	var match Match = nil
 	for idx := i; idx <= len(str); idx++ {
 		if len(currentStates) == 0 {
@@ -273,10 +310,12 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			if currentState.isLast {
 				currentState.threadGroups[0].EndIdx = idx
 				match = append([]Group{}, currentState.threadGroups...)
-				break
-			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
-				if currentState.contentContains(str, idx) {
-					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil)
+				if !preferLongest {
+					break
+				}
+			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
+				if currentState.contentContains(str, idx, preferLongest) {
+					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
 				}
 			}
 		}
@@ -291,3 +330,131 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	}
 	return false, []Group{}, i + 1
 }
+
+// Expand appends template to dst, expanding any variables in template to the relevant capturing group.
+//
+// A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
+// To insert a literal $, do not put a number after it. Alternatively, you can use $$.
+// src is the input string, and match must be the result of [Reg.FindSubmatch].
+func (re Reg) Expand(dst string, template string, src string, match Match) string {
+	templateRuneSlc := []rune(template)
+	srcRuneSlc := []rune(src)
+	i := 0
+	for i < len(templateRuneSlc) {
+		c := templateRuneSlc[i]
+		if c == '$' {
+			i += 1
+			// The dollar sign is the last character of the string, or it is proceeded by another dollar sign
+			if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' {
+				dst += "$"
+				i++
+			} else {
+				numStr := ""
+				for i < len(templateRuneSlc) && unicode.IsDigit(templateRuneSlc[i]) {
+					numStr += string(templateRuneSlc[i])
+					i++
+				}
+				if numStr == "" {
+					dst += "$"
+				} else {
+					num, _ := strconv.Atoi(numStr)
+					if num < len(match) {
+						dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx])
+					} else {
+						dst += "$" + numStr
+					}
+				}
+			}
+		} else {
+			dst += string(c)
+			i++
+		}
+	}
+	return dst
+}
+
+// LiteralPrefix returns a string that must begin any match of the given regular expression.
+// The second return value is true if the string comprises the entire expression.
+func (re Reg) LiteralPrefix() (prefix string, complete bool) {
+	state := re.start
+	if state.assert != noneAssert {
+		state = state.next
+	}
+	for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert {
+		if state.groupBegin || state.groupEnd {
+			state = state.next
+			continue
+		}
+		prefix += string(rune(state.content[0]))
+		state = state.next
+	}
+	if state.isLast {
+		complete = true
+	} else {
+		complete = false
+	}
+	return prefix, complete
+}
+
+// ReplaceAll replaces all matches of the expression in src, with the text in repl. In repl, variables are interpreted
+// as they are in [Reg.Expand]. The resulting string is returned.
+func (re Reg) ReplaceAll(src string, repl string) string {
+	matches := re.FindAllSubmatch(src)
+	i := 0
+	currentMatch := 0
+	dst := ""
+	for i < len(src) {
+		if currentMatch < len(matches) && matches[currentMatch][0].IsValid() && i == matches[currentMatch][0].StartIdx {
+			dst += re.Expand("", repl, src, matches[currentMatch])
+			i = matches[currentMatch][0].EndIdx
+			currentMatch++
+		} else {
+			dst += string(src[i])
+			i++
+		}
+	}
+	return dst
+}
+
+// ReplaceAllLiteral replaces all matches of the expression in src, with the text in repl. The text is replaced directly,
+// without any expansion.
+func (re Reg) ReplaceAllLiteral(src string, repl string) string {
+	zerogroups := re.FindAll(src)
+	currentMatch := 0
+	i := 0
+	dst := ""
+
+	for i < len(src) {
+		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
+			dst += repl
+			i = zerogroups[currentMatch].EndIdx
+			currentMatch += 1
+		} else {
+			dst += string(src[i])
+			i++
+		}
+	}
+	return dst
+}
+
+// ReplaceAllFunc replaces every match of the expression in src, with the return value of the function replFunc.
+// replFunc takes in the matched string. The return value is substituted in directly without expasion.
+func (re Reg) ReplaceAllFunc(src string, replFunc func(string) string) string {
+	zerogroups := re.FindAll(src)
+	currentMatch := 0
+	i := 0
+	dst := ""
+
+	for i < len(src) {
+		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
+			dst += replFunc(src[zerogroups[currentMatch].StartIdx:zerogroups[currentMatch].EndIdx])
+			i = zerogroups[currentMatch].EndIdx
+			currentMatch += 1
+		} else {
+			dst += string(src[i])
+			i++
+		}
+	}
+	return dst
+
+}
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -133,7 +133,7 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {

 // Checks if the given state's assertion is true. Returns true if the given
 // state doesn't have an assertion.
-func (s nfaState) checkAssertion(str []rune, idx int) bool {
+func (s nfaState) checkAssertion(str []rune, idx int, preferLongest bool) bool {
 	if s.assert == alwaysTrueAssert {
 		return true
 	}
@@ -183,7 +183,7 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 			strToMatch = string(runesToMatch)
 		}

-		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex}
+		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex, preferLongest}
 		matchIndices := regComp.FindAll(strToMatch)

 		numMatchesFound := 0
@@ -210,9 +210,9 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 }

 // Returns true if the contents of 's' contain the value at the given index of the given string
-func (s nfaState) contentContains(str []rune, idx int) bool {
+func (s nfaState) contentContains(str []rune, idx int, preferLongest bool) bool {
 	if s.assert != noneAssert {
-		return s.checkAssertion(str, idx)
+		return s.checkAssertion(str, idx, preferLongest)
 	}
 	if idx >= len(str) {
 		return false
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -25,7 +25,9 @@ var reTests = []struct {
 	{"a*b", nil, "qwqw", []Group{}},
 	{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
 	{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
-	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
+	// This match will only happen with Longest()
+	// {"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
+	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}},
 	{"b*a*a", nil, "bba", []Group{{0, 3}}},
 	{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
 	{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
@@ -537,7 +539,9 @@ var groupTests = []struct {
 	{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
 	{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	// This match will only happen with Longest()
+	//	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 3}, {0, 3}, {-1, -1}}}},
 	{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
 	{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
@@ -857,6 +861,60 @@ func TestFindStringSubmatch(t *testing.T) {
 		})
 	}
 }
+
+func TestFindAllStringSubmatch(t *testing.T) {
+	for _, test := range groupTests {
+		t.Run(test.re+"	"+test.str, func(t *testing.T) {
+			regComp, err := Compile(test.re, test.flags...)
+			if err != nil {
+				if test.result != nil {
+					panic(err)
+				}
+			}
+			matchStrs := regComp.FindAllStringSubmatch(test.str)
+			if matchStrs == nil {
+				if len(test.result) != 0 {
+					expectedStrs := funcMap(test.result, func(m Match) []string {
+						return funcMap(m, func(g Group) string {
+							if g.IsValid() {
+								return test.str[g.StartIdx:g.EndIdx]
+							} else {
+								return ""
+							}
+						})
+					})
+					t.Errorf("Wanted %v got no match\n", expectedStrs)
+				}
+			} else if len(test.result) == 0 {
+				t.Errorf("Wanted no match got %v\n", matchStrs)
+			} else {
+				expectedStrs := funcMap(test.result, func(m Match) []string {
+					return funcMap(m, func(g Group) string {
+						if g.IsValid() {
+							return test.str[g.StartIdx:g.EndIdx]
+						} else {
+							return ""
+						}
+					})
+				})
+				for i, matchStr := range matchStrs {
+					for j, groupStr := range matchStr {
+						if groupStr == "" {
+							if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
+								t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
+							}
+						} else {
+							if expectedStrs[i][j] != groupStr {
+								t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
+							}
+						}
+					}
+				}
+			}
+		})
+	}
+}
+
 func TestFindAllSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
Author	SHA1	Message	Date
Aadhavan Srinivasan	073f231b89	Added function and examples for ReplaceAllFunc()	2025-02-10 21:35:51 -05:00
Aadhavan Srinivasan	3b7257c921	Wrote function and example for ReplaceAllLiteral()	2025-02-10 21:25:49 -05:00
Aadhavan Srinivasan	668df8b70a	Wrote MarshalText() and UnmarshalText() to implement TextMarshaler and TextUnmarshaler	2025-02-10 12:30:48 -05:00
Aadhavan Srinivasan	214acf7e0f	Wrote example for ReplaceAll(); fixed out-of-bounds bug in Expand()	2025-02-10 12:30:17 -05:00
Aadhavan Srinivasan	50221ff4d9	Wrote ReplaceAll(), to replace all matches of the regex with a given string	2025-02-10 12:29:54 -05:00
Aadhavan Srinivasan	5ab95f512a	Updated docs	2025-02-10 09:36:00 -05:00
Aadhavan Srinivasan	e7da678408	Removed obsolete documentation	2025-02-10 09:35:16 -05:00
Aadhavan Srinivasan	ab363e2766	Rewrote test for 'FindString()' to use lookarounds	2025-02-10 09:24:47 -05:00
Aadhavan Srinivasan	c803e45415	Added example for 'FindStringSubmatch()'	2025-02-10 09:19:24 -05:00
Aadhavan Srinivasan	525296f239	Added examples for 'FindAllString()' , 'FindAllSubmatch()' and 'FindAllStringSubmatch()'	2025-02-10 09:10:39 -05:00
Aadhavan Srinivasan	eb0ab9f7ec	Wrote test for FindAllStringSubmatch()	2025-02-10 08:39:20 -05:00
Aadhavan Srinivasan	17a7dbae4c	Wrote FindAllStringSubmatch()	2025-02-10 08:39:10 -05:00
Aadhavan Srinivasan	f2279acd98	Fixed mistake in docs	2025-02-10 08:12:09 -05:00
Aadhavan Srinivasan	662527c478	Merge pull request 'Implement PCRE Matching (prefer left-branch)' (#2 ) from implementPCREMatchingRules into master Reviewed-on: #2	2025-02-09 15:24:26 -06:00
Aadhavan Srinivasan	d1958f289c	Commented out tests that would only pass with Longest()	2025-02-09 16:08:16 -05:00
Aadhavan Srinivasan	15ee49f42e	Rename method receivers from 'regex' to 're' (it's shorter)	2025-02-09 15:51:46 -05:00
Aadhavan Srinivasan	b60ded4136	Don't break when a match is found, if we are looking for the longest match	2025-02-09 15:48:33 -05:00
Aadhavan Srinivasan	9fbb99f86c	Wrote example for Longest()	2025-02-09 15:47:57 -05:00
Aadhavan Srinivasan	af15904f3b	Updated documentation	2025-02-09 15:41:13 -05:00
Aadhavan Srinivasan	d522f50b50	Wrote new example functions	2025-02-09 15:40:59 -05:00
Aadhavan Srinivasan	fb47e082eb	Wrote new methods Expand() and preferLongest(); Use new function signatures (with preferLongest); only characters should be added to next state list	2025-02-09 15:40:39 -05:00
Aadhavan Srinivasan	1f5a363539	Use new function signatures (with preferLongest)	2025-02-09 15:39:09 -05:00
Aadhavan Srinivasan	9e12f9dcb3	Added field to Reg, denoting if we prefer longest match (POSIX style) or not (perl style)	2025-02-09 15:38:26 -05:00