From d4e3942d27a0af01620eda97a7d37925873e9487 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Sun, 9 Feb 2025 08:58:09 -0500 Subject: [PATCH] Added Match() and FindStringSubmatch(); removed old code; updated comments --- regex/matching.go | 99 +++++++++++++++++++---------------------------- 1 file changed, 39 insertions(+), 60 deletions(-) diff --git a/regex/matching.go b/regex/matching.go index 6a5e0e7..7864084 100644 --- a/regex/matching.go +++ b/regex/matching.go @@ -14,7 +14,7 @@ import ( // See [Reg.FindSubmatch] for an example. type Match []Group -// a Group represents a group. It contains the start index and end index of the match +// a Group represents a capturing group. It contains the start and index of the group. type Group struct { StartIdx int EndIdx int @@ -58,7 +58,7 @@ func (idx Group) String() string { return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx) } -// Returns whether a group is valid (ie. whether it matched any text). It +// IsValid returns whether a group is valid (ie. whether it matched any text). It // simply ensures that both indices of the group are >= 0. func (g Group) IsValid() bool { return g.StartIdx >= 0 && g.EndIdx >= 0 @@ -69,63 +69,6 @@ func getZeroGroup(m Match) Group { return m[0] } -// takeZeroState takes the 0-state (if such a transition exists) for all states in the -// given slice. It returns the resulting states. If any of the resulting states is a 0-state, -// the second ret val is true. -// If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index. -//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) { -// for _, state := range states { -// if len(state.transitions[epsilon]) > 0 { -// for _, s := range state.transitions[epsilon] { -// if s.threadGroups == nil { -// s.threadGroups = newMatch(numGroups + 1) -// } -// copy(s.threadGroups, state.threadGroups) -// if s.groupBegin { -// s.threadGroups[s.groupNum].StartIdx = idx -// // openParenGroups = append(openParenGroups, s.groupNum) -// } -// if s.groupEnd { -// s.threadGroups[s.groupNum].EndIdx = idx -// // closeParenGroups = append(closeParenGroups, s.groupNum) -// } -// } -// rtv = append(rtv, state.transitions[epsilon]...) -// } -// } -// for _, state := range rtv { -// if len(state.transitions[epsilon]) > 0 { -// return rtv, true -// } -// } -// return rtv, false -//} - -// zeroMatchPossible returns true if a zero-length match is possible -// from any of the given states, given the string and our position in it. -// It uses the same algorithm to find zero-states as the one inside the loop, -// so I should probably put it in a function. -//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool { -// zeroStates, isZero := takeZeroState(states, numGroups, idx) -// tempstates := make([]*nfaState, 0, len(zeroStates)+len(states)) -// tempstates = append(tempstates, states...) -// tempstates = append(tempstates, zeroStates...) -// num_appended := 0 // number of unique states addded to tempstates -// for isZero == true { -// zeroStates, isZero = takeZeroState(tempstates, numGroups, idx) -// tempstates, num_appended = uniqueAppend(tempstates, zeroStates...) -// if num_appended == 0 { // break if we haven't appended any more unique values -// break -// } -// } -// for _, state := range tempstates { -// if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast { -// return true -// } -// } -// return false -//} - // Prunes the slice by removing overlapping indices. func pruneIndices(indices []Match) []Match { // First, sort the slice by the start indices @@ -164,6 +107,12 @@ func (regex Reg) Find(str string) (Group, error) { return getZeroGroup(match), nil } +// Match returns a boolean value, indicating whether the regex found a match in the given string. +func (regex Reg) Match(str string) bool { + _, err := regex.Find(str) + return err == nil +} + // FindAll returns a slice containing all the 0-groups of the regex in the given string. // A 0-group represents the match without any submatches. func (regex Reg) FindAll(str string) []Group { @@ -199,7 +148,37 @@ func (regex Reg) FindSubmatch(str string) (Match, error) { } } -// FindAllString is the 'all' version of FindString. +// FindStringSubmatch is the 'string' version of [FindSubmatch]. It returns a slice of strings, +// where the string at index i contains the text matched by the i-th capturing group. +// The 0-th index represents the entire match. +// An empty string at index n could mean: +// , +// 1. Group n did not find a match +// 2. Group n found a zero-length match +// +// A return value of nil indicates no match. +func (regex Reg) FindStringSubmatch(str string) []string { + matchStr := make([]string, regex.numGroups+1) + match, err := regex.FindSubmatch(str) + if err != nil { + return nil + } + nonEmptyMatchFound := false + for i := range match { + if match[i].IsValid() { + matchStr[i] = str[match[i].StartIdx:match[i].EndIdx] + nonEmptyMatchFound = true + } else { + matchStr[i] = "" + } + } + if nonEmptyMatchFound == false { + return nil + } + return matchStr +} + +// FindAllString is the 'all' version of [FindString]. // It returns a slice of strings containing the text of all matches of // the regex in the given string. func (regex Reg) FindAllString(str string) []string {