Added function and examples for ReplaceAllFunc()

Wrote function and example for ReplaceAllLiteral()
Wrote MarshalText() and UnmarshalText() to implement TextMarshaler and TextUnmarshaler
2025-02-10 21:35:51 -05:00 · 2025-02-10 21:25:49 -05:00 · 2025-02-10 12:30:48 -05:00 · 2025-02-10 12:30:17 -05:00 · 2025-02-10 12:29:54 -05:00 · 2025-02-10 09:36:00 -05:00
5 changed files with 253 additions and 23 deletions
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -31,6 +31,22 @@ func (re Reg) String() string {
 	return re.str
 }
 // MarshalText implements [encoding.TextMarshaler]. The output is equivalent to that of [Reg.String].
 // Any flags passed as arguments (including calling [Reg.Longest]) are lost.
 func (re *Reg) MarshalText() ([]byte, error) {
 	return []byte(re.String()), nil
 }
 // UnmarshalText implements [encoding.TextUnmarshaler]. It calls [Reg.Compile] on the given byte-slice. If it returns successfully,
 // then the result of the compilation is stored in re. The result of [Reg.Compile] is returned.
 func (re *Reg) UnmarshalText(text []byte) error {
 	newReg, err := Compile(string(text))
 	if err == nil {
 		*re = newReg
 	}
 	return err
 }
 func (re *Reg) Longest() {
 	re.preferLongest = true
 }
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -33,7 +33,7 @@ Perl classes:
 	\d				Match any digit character ([0-9])
 	\D				Match any non-digit character ([^0-9])
 	\w				Match any word character ([a-zA-Z0-9_])
-	\W				Match any word character ([^a-zA-Z0-9_])
+	\W				Match any non-word character ([^a-zA-Z0-9_])
 	\s				Match any whitespace character ([ \t\n])
 	\S				Match any non-whitespace character ([^ \t\n])
@@ -105,23 +105,7 @@ The key differences are mentioned below.
 1. Greediness:
-This engine does not support non-greedy operators. All operators are always greedy in nature, and will try
+This engine currently does not support non-greedy operators.
 to match as much as they can, while still allowing for a successful match. For example, given the regex:
 	y*y
 The engine will match as many 'y's as it can, while still allowing the trailing 'y' to be matched.
 Another, more subtle example is the following regex:
 	x|xx
 While the stdlib implementation (and most other engines) will prefer matching the first item of the alternation,
 this engine will go for the longest possible match, regardless of the order of the alternation. Although this
 strays from the convention, it results in a nice rule-of-thumb - the engine is ALWAYS greedy.
 The stdlib implementation has a function [regexp.Regexp.Longest] which makes future searches prefer the longest match.
 That is the default (and unchangable) behavior in this engine.
 2. Byte-slices and runes:
@@ -166,7 +150,7 @@ The following features from [regexp] are (currently) NOT supported:
 1. Named capturing groups
 2. Non-greedy operators
 3. Unicode character classes
- 4. Embedded flags (flags are passed as arguments to [Compile])
+ 4. Embedded flags (flags are instead passed as arguments to [Compile])
 5. Literal text with \Q ... \E
 The following features are not available in [regexp], but are supported in my engine:
--- a/regex/example_test.go
+++ b/regex/example_test.go
@@ -2,6 +2,7 @@ package regex_test
 import (
 	"fmt"
 	"strings"
 	"gitea.twomorecents.org/Rockingcool/kleingrep/regex"
 )
@@ -32,12 +33,12 @@ func ExampleReg_FindAll() {
 }
 func ExampleReg_FindString() {
-	regexStr := `\d+`
+	regexStr := `\w+\s+(?=sheep)`
 	regexComp := regex.MustCompile(regexStr)
-	matchStr := regexComp.FindString("The year of our lord, 2025")
+	matchStr := regexComp.FindString("pink cows and yellow sheep")
 	fmt.Println(matchStr)
-	// Output: 2025
+	// Output: yellow
 }
 func ExampleReg_FindSubmatch() {
@@ -53,6 +54,71 @@ func ExampleReg_FindSubmatch() {
 	// 2	3
 }
 func ExampleReg_FindStringSubmatch() {
 	regexStr := `(\d{4})-(\d{2})-(\d{2})`
 	regexComp := regex.MustCompile(regexStr)
 	inputStr := `The date is 2025-02-10`
 	match := regexComp.FindStringSubmatch(inputStr)
 	fmt.Println(match[1])
 	fmt.Println(match[3])
 	// Output: 2025
 	// 10
 }
 func ExampleReg_FindAllSubmatch() {
 	regexStr := `(\d)\.(\d)(\d)`
 	regexComp := regex.MustCompile(regexStr)
 	matches := regexComp.FindAllSubmatch("3.14+8.97")
 	fmt.Println(matches[0][0]) // 0-group (entire match) of 1st match (0-indexed)
 	fmt.Println(matches[0][1]) // 1st group of 1st match
 	fmt.Println(matches[1][0]) // 0-group of 2nd match
 	fmt.Println(matches[1][1]) // 1st group of 2nd math
 	// Output: 0	4
 	// 0	1
 	// 5	9
 	// 5	6
 }
 func ExampleReg_FindAllString() {
 	regexStr := `<0-255>\.<0-255>\.<0-255>\.<0-255>`
 	inputStr := `192.168.220.7 pings 9.9.9.9`
 	regexComp := regex.MustCompile(regexStr)
 	matchStrs := regexComp.FindAllString(inputStr)
 	fmt.Println(matchStrs[0])
 	fmt.Println(matchStrs[1])
 	// Output: 192.168.220.7
 	// 9.9.9.9
 }
 func ExampleReg_FindAllStringSubmatch() {
 	// 'https' ...
 	// followed by 1 or more alphanumeric characters (including period) ...
 	// then a forward slash ...
 	// followed by one more of :
 	// 		word character,
 	// 		question mark,
 	// 		period,
 	// 		equals sign
 	regexStr := `https://([a-z0-9\.]+)/([\w.?=]+)`
 	regexComp := regex.MustCompile(regexStr, regex.RE_CASE_INSENSITIVE)
 	inputStr := `You can find me at https://twomorecents.org/index.html and https://news.ycombinator.com/user?id=aadhavans`
 	matchIndices := regexComp.FindAllStringSubmatch(inputStr)
 	fmt.Println(matchIndices[0][1]) // 1st group of 1st match (0-indexed)
 	fmt.Println(matchIndices[0][2]) // 2nd group of 1st match
 	fmt.Println(matchIndices[1][1]) // 1st group of 2nd match
 	fmt.Println(matchIndices[1][2]) // 2nd group of 2nd match
 	// Output: twomorecents.org
 	// index.html
 	// news.ycombinator.com
 	// user?id=aadhavans
 }
 func ExampleReg_Expand() {
 	inputStr := `option1: value1
 	option2: value2`
@@ -89,3 +155,27 @@ func ExampleReg_Longest() {
 	// Output: x
 	// xx
 }
 func ExampleReg_ReplaceAll() {
 	regexStr := `(\d)(\w)`
 	inputStr := "5d9t"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAll(inputStr, `$2$1`))
 	// Output: d5t9
 }
 func ExampleReg_ReplaceAllLiteral() {
 	regexStr := `fox|dog`
 	inputStr := "the quick brown fox jumped over the lazy dog"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAllLiteral(inputStr, `duck`))
 	// Output: the quick brown duck jumped over the lazy duck
 }
 func ExampleReg_ReplaceAllFunc() {
 	regexStr := `\w{5,}`
 	inputStr := `all five or more letter words in this string are capitalized`
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAllFunc(inputStr, strings.ToUpper))
 	// Output: all five or more LETTER WORDS in this STRING are CAPITALIZED
 }
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -205,6 +205,29 @@ func (re Reg) FindAllSubmatch(str string) []Match {
 	return indices
 }
 // FindAllSubmatch returns a double-slice of strings. Each slice contains the text of a match, including all submatches.
 // A return value of nil indicates no match.
 func (re Reg) FindAllStringSubmatch(str string) [][]string {
 	match := re.FindAllSubmatch(str)
 	if len(match) == 0 {
 		return nil
 	}
 	rtv := make([][]string, len(match))
 	for i := range rtv {
 		rtv[i] = make([]string, re.numGroups+1)
 	}
 	rtv = funcMap(match, func(m Match) []string {
 		return funcMap(m, func(g Group) string {
 			if g.IsValid() {
 				return str[g.StartIdx:g.EndIdx]
 			} else {
 				return ""
 			}
 		})
 	})
 	return rtv
 }
 func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
 	if stateExists(list, state) || stateExists(visited, state) {
 		return list
@@ -327,7 +350,7 @@ func (re Reg) Expand(dst string, template string, src string, match Match) strin
 				i++
 			} else {
 				numStr := ""
-				for unicode.IsDigit(templateRuneSlc[i]) {
+				for i < len(templateRuneSlc) && unicode.IsDigit(templateRuneSlc[i]) {
 					numStr += string(templateRuneSlc[i])
 					i++
 				}
@@ -372,3 +395,66 @@ func (re Reg) LiteralPrefix() (prefix string, complete bool) {
 	}
 	return prefix, complete
 }
 // ReplaceAll replaces all matches of the expression in src, with the text in repl. In repl, variables are interpreted
 // as they are in [Reg.Expand]. The resulting string is returned.
 func (re Reg) ReplaceAll(src string, repl string) string {
 	matches := re.FindAllSubmatch(src)
 	i := 0
 	currentMatch := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(matches) && matches[currentMatch][0].IsValid() && i == matches[currentMatch][0].StartIdx {
 			dst += re.Expand("", repl, src, matches[currentMatch])
 			i = matches[currentMatch][0].EndIdx
 			currentMatch++
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
 // ReplaceAllLiteral replaces all matches of the expression in src, with the text in repl. The text is replaced directly,
 // without any expansion.
 func (re Reg) ReplaceAllLiteral(src string, repl string) string {
 	zerogroups := re.FindAll(src)
 	currentMatch := 0
 	i := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
 			dst += repl
 			i = zerogroups[currentMatch].EndIdx
 			currentMatch += 1
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
 // ReplaceAllFunc replaces every match of the expression in src, with the return value of the function replFunc.
 // replFunc takes in the matched string. The return value is substituted in directly without expasion.
 func (re Reg) ReplaceAllFunc(src string, replFunc func(string) string) string {
 	zerogroups := re.FindAll(src)
 	currentMatch := 0
 	i := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
 			dst += replFunc(src[zerogroups[currentMatch].StartIdx:zerogroups[currentMatch].EndIdx])
 			i = zerogroups[currentMatch].EndIdx
 			currentMatch += 1
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -861,6 +861,60 @@ func TestFindStringSubmatch(t *testing.T) {
 		})
 	}
 }
 func TestFindAllStringSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
 			regComp, err := Compile(test.re, test.flags...)
 			if err != nil {
 				if test.result != nil {
 					panic(err)
 				}
 			}
 			matchStrs := regComp.FindAllStringSubmatch(test.str)
 			if matchStrs == nil {
 				if len(test.result) != 0 {
 					expectedStrs := funcMap(test.result, func(m Match) []string {
 						return funcMap(m, func(g Group) string {
 							if g.IsValid() {
 								return test.str[g.StartIdx:g.EndIdx]
 							} else {
 								return ""
 							}
 						})
 					})
 					t.Errorf("Wanted %v got no match\n", expectedStrs)
 				}
 			} else if len(test.result) == 0 {
 				t.Errorf("Wanted no match got %v\n", matchStrs)
 			} else {
 				expectedStrs := funcMap(test.result, func(m Match) []string {
 					return funcMap(m, func(g Group) string {
 						if g.IsValid() {
 							return test.str[g.StartIdx:g.EndIdx]
 						} else {
 							return ""
 						}
 					})
 				})
 				for i, matchStr := range matchStrs {
 					for j, groupStr := range matchStr {
 						if groupStr == "" {
 							if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
 								t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
 							}
 						} else {
 							if expectedStrs[i][j] != groupStr {
 								t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
 							}
 						}
 					}
 				}
 			}
 		})
 	}
 }
 func TestFindAllSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
Author	SHA1	Message	Date
Aadhavan Srinivasan	073f231b89	Added function and examples for ReplaceAllFunc()	2025-02-10 21:35:51 -05:00
Aadhavan Srinivasan	3b7257c921	Wrote function and example for ReplaceAllLiteral()	2025-02-10 21:25:49 -05:00
Aadhavan Srinivasan	668df8b70a	Wrote MarshalText() and UnmarshalText() to implement TextMarshaler and TextUnmarshaler	2025-02-10 12:30:48 -05:00
Aadhavan Srinivasan	214acf7e0f	Wrote example for ReplaceAll(); fixed out-of-bounds bug in Expand()	2025-02-10 12:30:17 -05:00
Aadhavan Srinivasan	50221ff4d9	Wrote ReplaceAll(), to replace all matches of the regex with a given string	2025-02-10 12:29:54 -05:00
Aadhavan Srinivasan	5ab95f512a	Updated docs	2025-02-10 09:36:00 -05:00
Aadhavan Srinivasan	e7da678408	Removed obsolete documentation	2025-02-10 09:35:16 -05:00
Aadhavan Srinivasan	ab363e2766	Rewrote test for 'FindString()' to use lookarounds	2025-02-10 09:24:47 -05:00
Aadhavan Srinivasan	c803e45415	Added example for 'FindStringSubmatch()'	2025-02-10 09:19:24 -05:00
Aadhavan Srinivasan	525296f239	Added examples for 'FindAllString()' , 'FindAllSubmatch()' and 'FindAllStringSubmatch()'	2025-02-10 09:10:39 -05:00
Aadhavan Srinivasan	eb0ab9f7ec	Wrote test for FindAllStringSubmatch()	2025-02-10 08:39:20 -05:00
Aadhavan Srinivasan	17a7dbae4c	Wrote FindAllStringSubmatch()	2025-02-10 08:39:10 -05:00
Aadhavan Srinivasan	f2279acd98	Fixed mistake in docs	2025-02-10 08:12:09 -05:00