2025-02-09 15:24:29 -06:00
6 changed files with 629 additions and 400 deletions
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -14,7 +14,7 @@ import (
 // See [Reg.FindSubmatch] for an example.
 type Match []Group
-// a Group represents a group. It contains the start index and end index of the match
+// a Group represents a capturing group. It contains the start and index of the group.
 type Group struct {
 	StartIdx int
 	EndIdx   int
@@ -58,7 +58,7 @@ func (idx Group) String() string {
 	return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
 }
-// Returns whether a group is valid (ie. whether it matched any text). It
+// IsValid returns whether a group is valid (ie. whether it matched any text). It
 // simply ensures that both indices of the group are >= 0.
 func (g Group) IsValid() bool {
 	return g.StartIdx >= 0 && g.EndIdx >= 0
@@ -69,63 +69,6 @@ func getZeroGroup(m Match) Group {
 	return m[0]
 }
 // takeZeroState takes the 0-state (if such a transition exists) for all states in the
 // given slice. It returns the resulting states. If any of the resulting states is a 0-state,
 // the second ret val is true.
 // If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
 //func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
 //	for _, state := range states {
 //		if len(state.transitions[epsilon]) > 0 {
 //			for _, s := range state.transitions[epsilon] {
 //				if s.threadGroups == nil {
 //					s.threadGroups = newMatch(numGroups + 1)
 //				}
 //				copy(s.threadGroups, state.threadGroups)
 //				if s.groupBegin {
 //					s.threadGroups[s.groupNum].StartIdx = idx
 //					//					openParenGroups = append(openParenGroups, s.groupNum)
 //				}
 //				if s.groupEnd {
 //					s.threadGroups[s.groupNum].EndIdx = idx
 //					//					closeParenGroups = append(closeParenGroups, s.groupNum)
 //				}
 //			}
 //			rtv = append(rtv, state.transitions[epsilon]...)
 //		}
 //	}
 //	for _, state := range rtv {
 //		if len(state.transitions[epsilon]) > 0 {
 //			return rtv, true
 //		}
 //	}
 //	return rtv, false
 //}
 // zeroMatchPossible returns true if a zero-length match is possible
 // from any of the given states, given the string and our position in it.
 // It uses the same algorithm to find zero-states as the one inside the loop,
 // so I should probably put it in a function.
 //func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
 //	zeroStates, isZero := takeZeroState(states, numGroups, idx)
 //	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
 //	tempstates = append(tempstates, states...)
 //	tempstates = append(tempstates, zeroStates...)
 //	num_appended := 0 // number of unique states addded to tempstates
 //	for isZero == true {
 //		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
 //		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
 //		if num_appended == 0 { // break if we haven't appended any more unique values
 //			break
 //		}
 //	}
 //	for _, state := range tempstates {
 //		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
 //			return true
 //		}
 //	}
 //	return false
 //}
 // Prunes the slice by removing overlapping indices.
 func pruneIndices(indices []Match) []Match {
 	// First, sort the slice by the start indices
@@ -164,6 +107,12 @@ func (regex Reg) Find(str string) (Group, error) {
 	return getZeroGroup(match), nil
 }
 // Match returns a boolean value, indicating whether the regex found a match in the given string.
 func (regex Reg) Match(str string) bool {
 	_, err := regex.Find(str)
 	return err == nil
 }
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
 func (regex Reg) FindAll(str string) []Group {
@@ -199,7 +148,37 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 	}
 }
-// FindAllString is the 'all' version of FindString.
+// FindStringSubmatch is the 'string' version of [FindSubmatch]. It returns a slice of strings,
 // where the string at index i contains the text matched by the i-th capturing group.
 // The 0-th index represents the entire match.
 // An empty string at index n could mean:
 // ,
 //  1. Group n did not find a match
 //  2. Group n found a zero-length match
 //
 // A return value of nil indicates no match.
 func (regex Reg) FindStringSubmatch(str string) []string {
 	matchStr := make([]string, regex.numGroups+1)
 	match, err := regex.FindSubmatch(str)
 	if err != nil {
 		return nil
 	}
 	nonEmptyMatchFound := false
 	for i := range match {
 		if match[i].IsValid() {
 			matchStr[i] = str[match[i].StartIdx:match[i].EndIdx]
 			nonEmptyMatchFound = true
 		} else {
 			matchStr[i] = ""
 		}
 	}
 	if nonEmptyMatchFound == false {
 		return nil
 	}
 	return matchStr
 }
 // FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
 func (regex Reg) FindAllString(str string) []string {