Added Match() and FindStringSubmatch(); removed old code; updated comments

2025-02-09 08:58:09 -05:00
parent f15a5cae34
commit d4e3942d27
1 changed files with 39 additions and 60 deletions
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -14,7 +14,7 @@ import (
 // See [Reg.FindSubmatch] for an example.
 type Match []Group

-// a Group represents a group. It contains the start index and end index of the match
+// a Group represents a capturing group. It contains the start and index of the group.
 type Group struct {
 	StartIdx int
 	EndIdx   int
@@ -58,7 +58,7 @@ func (idx Group) String() string {
 	return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
 }

-// Returns whether a group is valid (ie. whether it matched any text). It
+// IsValid returns whether a group is valid (ie. whether it matched any text). It
 // simply ensures that both indices of the group are >= 0.
 func (g Group) IsValid() bool {
 	return g.StartIdx >= 0 && g.EndIdx >= 0
@@ -69,63 +69,6 @@ func getZeroGroup(m Match) Group {
 	return m[0]
 }

-// takeZeroState takes the 0-state (if such a transition exists) for all states in the
-// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
-// the second ret val is true.
-// If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
-//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
-//	for _, state := range states {
-//		if len(state.transitions[epsilon]) > 0 {
-//			for _, s := range state.transitions[epsilon] {
-//				if s.threadGroups == nil {
-//					s.threadGroups = newMatch(numGroups + 1)
-//				}
-//				copy(s.threadGroups, state.threadGroups)
-//				if s.groupBegin {
-//					s.threadGroups[s.groupNum].StartIdx = idx
-//					//					openParenGroups = append(openParenGroups, s.groupNum)
-//				}
-//				if s.groupEnd {
-//					s.threadGroups[s.groupNum].EndIdx = idx
-//					//					closeParenGroups = append(closeParenGroups, s.groupNum)
-//				}
-//			}
-//			rtv = append(rtv, state.transitions[epsilon]...)
-//		}
-//	}
-//	for _, state := range rtv {
-//		if len(state.transitions[epsilon]) > 0 {
-//			return rtv, true
-//		}
-//	}
-//	return rtv, false
-//}
-
-// zeroMatchPossible returns true if a zero-length match is possible
-// from any of the given states, given the string and our position in it.
-// It uses the same algorithm to find zero-states as the one inside the loop,
-// so I should probably put it in a function.
-//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
-//	zeroStates, isZero := takeZeroState(states, numGroups, idx)
-//	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
-//	tempstates = append(tempstates, states...)
-//	tempstates = append(tempstates, zeroStates...)
-//	num_appended := 0 // number of unique states addded to tempstates
-//	for isZero == true {
-//		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
-//		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
-//		if num_appended == 0 { // break if we haven't appended any more unique values
-//			break
-//		}
-//	}
-//	for _, state := range tempstates {
-//		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
-//			return true
-//		}
-//	}
-//	return false
-//}
-
 // Prunes the slice by removing overlapping indices.
 func pruneIndices(indices []Match) []Match {
 	// First, sort the slice by the start indices
@@ -164,6 +107,12 @@ func (regex Reg) Find(str string) (Group, error) {
 	return getZeroGroup(match), nil
 }

+// Match returns a boolean value, indicating whether the regex found a match in the given string.
+func (regex Reg) Match(str string) bool {
+	_, err := regex.Find(str)
+	return err == nil
+}
+
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
 func (regex Reg) FindAll(str string) []Group {
@@ -199,7 +148,37 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 	}
 }

-// FindAllString is the 'all' version of FindString.
+// FindStringSubmatch is the 'string' version of [FindSubmatch]. It returns a slice of strings,
+// where the string at index i contains the text matched by the i-th capturing group.
+// The 0-th index represents the entire match.
+// An empty string at index n could mean:
+// ,
+//  1. Group n did not find a match
+//  2. Group n found a zero-length match
+//
+// A return value of nil indicates no match.
+func (regex Reg) FindStringSubmatch(str string) []string {
+	matchStr := make([]string, regex.numGroups+1)
+	match, err := regex.FindSubmatch(str)
+	if err != nil {
+		return nil
+	}
+	nonEmptyMatchFound := false
+	for i := range match {
+		if match[i].IsValid() {
+			matchStr[i] = str[match[i].StartIdx:match[i].EndIdx]
+			nonEmptyMatchFound = true
+		} else {
+			matchStr[i] = ""
+		}
+	}
+	if nonEmptyMatchFound == false {
+		return nil
+	}
+	return matchStr
+}
+
+// FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
 func (regex Reg) FindAllString(str string) []string {