Wrote test for 'FindSubmatch'

Updated call to 'isValid' with call to 'IsValid'
Wrote 'reg.FindSubmatch()' which returns the leftmost match with submatches, renamed 'isValid' to 'IsValid' to export it, renamed 'ToString' to 'String'
2025-02-01 11:09:05 -05:00 · 2025-02-01 11:06:26 -05:00 · 2025-02-01 11:06:03 -05:00 · 2025-02-01 11:04:49 -05:00 · 2025-02-01 11:04:24 -05:00
5 changed files with 80 additions and 8 deletions
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -137,7 +137,7 @@ func main() {
 					fmt.Fprintf(out, "Line %d:\n", lineNum)
 				}
 				for _, m := range matchIndices {
-					fmt.Fprintf(out, "%s\n", m.ToString())
+					fmt.Fprintf(out, "%s\n", m.String())
 				}
 				err := out.Flush()
 				if err != nil {
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -18,6 +18,12 @@ type Reg struct {
 	numGroups int
 }
 // numSubexp eturns the number of sub-expressions in the given [Reg]. This is equivalent
 // to the number of capturing groups.
 func (r Reg) NumSubexp() int {
 	return r.numGroups
 }
 const concatRune rune = 0xF0001
 // Flags for shuntingYard - control its behavior
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -121,5 +121,36 @@ this engine will _always_ go for the longest possible match, regardless of the o
 My engine does not support byte-slices. When a matching function receives a string, it converts it into a
 rune-slice to iterate through it. While this has some space overhead, the convenience of built-in unicode
 support made the tradeoff worth it.
 3. Return values
 Rather than using primitives for return values, my engine defines two types that are used as return
 values: a [Group] represents a capturing group, and a [Match] represents a list of groups.
 [regexp] specifies a regular expression that gives a list of all the matching functions that it supports. The
 equivalent expression for this engine is:
 	Find(All)?(String)?(Submatch)?
 [Reg.Find] returns the index of the leftmost match in the string.
 If a function contains 'All' it returns all matches instead of just the leftmost one.
 If a function contains 'String' it returns the matched text, rather than the indices.
 If a function contains 'Submatch' it returns the match, including all submatches found by
 capturing groups.
 The term '0-group' is used to refer to the 0th capturing group of a match (which is the entire match).
 Given the following regex:
 	x(y)
 and the input string:
 	xyz
 The 0th group would contain 'xy' and the 1st group would contain 'y'. Any matching function without 'Submatch' in its name
 returns the 0-group.
 */
 package regex
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -35,10 +35,10 @@ func (m Match) numValidGroups() int {
 }
 // Returns a string containing the indices of all (valid) groups in the match
-func (m Match) ToString() string {
+func (m Match) String() string {
 	var toRet string
 	for i, g := range m {
-		if g.isValid() {
+		if g.IsValid() {
 			toRet += fmt.Sprintf("Group %d\n", i)
 			toRet += g.toString()
 			toRet += "\n"
@@ -52,8 +52,9 @@ func (idx Group) toString() string {
 	return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
 }
-// Returns whether a group contains valid indices
+// Returns whether a group is valid (ie. whether it matched any text). It
-func (g Group) isValid() bool {
+// simply ensures that both indices of the group are >= 0.
 func (g Group) IsValid() bool {
 	return g.StartIdx >= 0 && g.EndIdx >= 0
 }
@@ -174,6 +175,20 @@ func (regex Reg) FindString(str string) string {
 	return str[zeroGroup.StartIdx:zeroGroup.EndIdx]
 }
 // FindSubmatch returns the leftmost match of the regex in the given string, including
 // the submatches matched by capturing groups. The returned [Match] will always contain the same
 // number of groups. The validity of a group (whether or not it matched anything) can be determined with
 // [Group.IsValid], or by checking that both indices of the group are >= 0.
 // The second-return value is nil if no match was found.
 func (regex Reg) FindSubmatch(str string) (Match, error) {
 	match, err := regex.FindNthMatch(str, 1)
 	if err != nil {
 		return Match{}, fmt.Errorf("no match found")
 	} else {
 		return match, nil
 	}
 }
 // FindAllString is the 'all' version of FindString.
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
@@ -372,7 +387,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 		// Check if we can find a zero-length match
 		if foundPath == false {
 			if ok := zeroMatchPossible(str, i, numGroups, currentStates...); ok {
-				if tempIndices[0].isValid() == false {
+				if tempIndices[0].IsValid() == false {
 					tempIndices[0] = Group{startIdx, startIdx}
 				}
 			}
@@ -382,7 +397,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			startIdx++
 			//	i++
 			//			}
-			if tempIndices.numValidGroups() > 0 && tempIndices[0].isValid() {
+			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
 				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
 					return true, tempIndices, tempIndices[0].EndIdx + 1
 				} else {
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -767,6 +767,26 @@ func TestFindAllString(t *testing.T) {
 	}
 }
 func TestFindSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
 			regComp, err := Compile(test.re, test.flags...)
 			if err != nil {
 				if test.result != nil {
 					panic(err)
 				}
 			}
 			match, err := regComp.FindSubmatch(test.str)
 			for i := range match {
 				if match[i].IsValid() {
 					if test.result[0][i] != match[i] {
 						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 					}
 				}
 			}
 		})
 	}
 }
 func TestFindAllSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
@@ -779,7 +799,7 @@ func TestFindAllSubmatch(t *testing.T) {
 			matchIndices := regComp.FindAllSubmatch(test.str)
 			for i := range matchIndices {
 				for j := range matchIndices[i] {
-					if matchIndices[i][j].isValid() {
+					if matchIndices[i][j].IsValid() {
 						if test.result[i][j] != matchIndices[i][j] {
 							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 						}
Author	SHA1	Message	Date
Aadhavan Srinivasan	2a9ae0b68a	Wrote test for 'FindSubmatch'	2025-02-01 11:09:05 -05:00
Aadhavan Srinivasan	783ae2ad10	Updated call to 'isValid' with call to 'IsValid'	2025-02-01 11:06:26 -05:00
Aadhavan Srinivasan	b5e6bc112c	Wrote 'reg.FindSubmatch()' which returns the leftmost match with submatches, renamed 'isValid' to 'IsValid' to export it, renamed 'ToString' to 'String'	2025-02-01 11:06:03 -05:00
Aadhavan Srinivasan	206fea34cd	Added function to return the number of subexpressions in the group	2025-02-01 11:04:49 -05:00
Aadhavan Srinivasan	fcdb23524a	Added more documentation	2025-02-01 11:04:24 -05:00