Fixed typo

Removed capitalization for error message (staticcheck)
Removed unused function
2025-02-09 15:14:17 -05:00 · 2025-02-09 09:14:45 -05:00 · 2025-02-09 09:13:52 -05:00 · 2025-02-09 09:13:29 -05:00 · 2025-02-09 09:13:03 -05:00 · 2025-02-09 09:12:55 -05:00
9 changed files with 142 additions and 543 deletions
--- a/cmd/unique_array.go
+++ b/cmd/unique_array.go
@@ -16,7 +16,6 @@ func (s *uniq_arr[T]) add(vals ...T) {
 			s.backingMap[item] = struct{}{}
 		}
 	}
 	return
 }
 func (s uniq_arr[T]) contains(val T) bool {
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -12,18 +12,24 @@ var notDotChars []rune
 // A Reg represents the result of compiling a regular expression. It contains
 // the startState of the NFA representation of the regex, and the number of capturing
-// groups in the regex.
+// groups in the regex. It also contains the expression string.
 type Reg struct {
 	start     *nfaState
 	numGroups int
 	str       string
 }
-// numSubexp eturns the number of sub-expressions in the given [Reg]. This is equivalent
+// NumSubexp returns the number of sub-expressions in the given [Reg]. This is equivalent
 // to the number of capturing groups.
 func (r Reg) NumSubexp() int {
 	return r.numGroups
 }
 // String returns the string used to compile the expression.
 func (r Reg) String() string {
 	return r.str
 }
 const concatRune rune = 0xF0001
 // Flags for shuntingYard - control its behavior
@@ -1128,7 +1134,8 @@ func thompson(re []postfixNode) (Reg, error) {
 	concatenate(nfa[0], &lastState)
-	return Reg{nfa[0], numGroups}, nil
+	// The string is empty here, because we add it in Compile()
 	return Reg{nfa[0], numGroups, ""}, nil
 }
@@ -1146,10 +1153,11 @@ func Compile(re string, flags ...ReFlag) (Reg, error) {
 	if err != nil {
 		return Reg{}, fmt.Errorf("error compiling regex: %w", err)
 	}
 	reg.str = re
 	return reg, nil
 }
-// MustCompile panicks if Compile returns an error. They are identical in all other respects.
+// MustCompile panics if Compile returns an error. They are identical in all other respects.
 func MustCompile(re string, flags ...ReFlag) Reg {
 	reg, err := Compile(re, flags...)
 	if err != nil {
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -4,6 +4,8 @@ Package regex implements regular expression search, using a custom non-bracktrac
 The engine relies completely on UTF-8 codepoints. As such, it is capable of matching characters
 from other languages, emojis and symbols.
 The API and regex syntax are largely compatible with that of the stdlib's [regexp], with a few key differences (see 'Key Differences with regexp').
 The full syntax is specified below.
 # Syntax
@@ -55,8 +57,8 @@ POSIX classes (inside normal character classes):
 Composition:
 	def				Match d, followed by e, followed by f
-	x|y				Match x or y (prefer longer one)
+	x|y				Match x or y (prefer x)
-	xy|z			Match xy or z
+	xy|z			Match xy or z (prefer xy)
 Repitition (always greedy, preferring more):
@@ -94,10 +96,11 @@ Lookarounds:
 Numeric ranges:
 	<x-y>			Match any number from x to y (inclusive) (x and y must be positive numbers)
 	\<x				Match a literal '<' followed by x
 # Key Differences with regexp
-The engine and the API differ from [regexp] in a number of ways, some of them very subtle.
+The engine and the API differ from [regexp] in a few ways, some of them very subtle.
 The key differences are mentioned below.
 1. Greediness:
@@ -132,7 +135,7 @@ Rather than using primitives for return values, my engine defines two types that
 values: a [Group] represents a capturing group, and a [Match] represents a list of groups.
 [regexp] specifies a regular expression that gives a list of all the matching functions that it supports. The
-equivalent expression for this engine is:
+equivalent expression for this engine is shown below. Note that 'Index' is the default.
 	Find(All)?(String)?(Submatch)?
@@ -140,7 +143,7 @@ equivalent expression for this engine is:
 If a function contains 'All' it returns all matches instead of just the leftmost one.
-If a function contains 'String' it returns the matched text, rather than the indices.
+If a function contains 'String' it returns the matched text, rather than the index in the string.
 If a function contains 'Submatch' it returns the match, including all submatches found by
 capturing groups.
@@ -156,5 +159,20 @@ and the input string:
 The 0th group would contain 'xy' and the 1st group would contain 'y'. Any matching function without 'Submatch' in its name
 returns the 0-group.
 # Feature Differences
 The following features from [regexp] are (currently) NOT supported:
 1. Named capturing groups
 2. Non-greedy operators
 3. Unicode character classes
 4. Embedded flags (flags are passed as arguments to [Compile])
 5. Literal text with \Q ... \E
 The following features are not available in [regexp], but are supported in my engine:
 1. Lookarounds
 2. Numeric ranges
 The goal is to shorten the first list, and expand the second.
 */
 package regex
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -2,7 +2,6 @@ package regex
 import (
 	"fmt"
 	"sort"
 )
 // A Match represents a match found by the regex in a given string.
@@ -14,7 +13,7 @@ import (
 // See [Reg.FindSubmatch] for an example.
 type Match []Group
-// a Group represents a group. It contains the start index and end index of the match
+// a Group represents a capturing group. It contains the start and index of the group.
 type Group struct {
 	StartIdx int
 	EndIdx   int
@@ -29,17 +28,6 @@ func newMatch(size int) Match {
 	return toRet
 }
 // Returns the number of valid groups in the match
 func (m Match) numValidGroups() int {
 	numValid := 0
 	for _, g := range m {
 		if g.StartIdx >= 0 && g.EndIdx >= 0 {
 			numValid++
 		}
 	}
 	return numValid
 }
 // Returns a string containing the indices of all (valid) groups in the match
 func (m Match) String() string {
 	var toRet string
@@ -58,7 +46,7 @@ func (idx Group) String() string {
 	return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
 }
-// Returns whether a group is valid (ie. whether it matched any text). It
+// IsValid returns whether a group is valid (ie. whether it matched any text). It
 // simply ensures that both indices of the group are >= 0.
 func (g Group) IsValid() bool {
 	return g.StartIdx >= 0 && g.EndIdx >= 0
@@ -69,87 +57,6 @@ func getZeroGroup(m Match) Group {
 	return m[0]
 }
 // takeZeroState takes the 0-state (if such a transition exists) for all states in the
 // given slice. It returns the resulting states. If any of the resulting states is a 0-state,
 // the second ret val is true.
 // If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
 //func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
 //	for _, state := range states {
 //		if len(state.transitions[epsilon]) > 0 {
 //			for _, s := range state.transitions[epsilon] {
 //				if s.threadGroups == nil {
 //					s.threadGroups = newMatch(numGroups + 1)
 //				}
 //				copy(s.threadGroups, state.threadGroups)
 //				if s.groupBegin {
 //					s.threadGroups[s.groupNum].StartIdx = idx
 //					//					openParenGroups = append(openParenGroups, s.groupNum)
 //				}
 //				if s.groupEnd {
 //					s.threadGroups[s.groupNum].EndIdx = idx
 //					//					closeParenGroups = append(closeParenGroups, s.groupNum)
 //				}
 //			}
 //			rtv = append(rtv, state.transitions[epsilon]...)
 //		}
 //	}
 //	for _, state := range rtv {
 //		if len(state.transitions[epsilon]) > 0 {
 //			return rtv, true
 //		}
 //	}
 //	return rtv, false
 //}
 // zeroMatchPossible returns true if a zero-length match is possible
 // from any of the given states, given the string and our position in it.
 // It uses the same algorithm to find zero-states as the one inside the loop,
 // so I should probably put it in a function.
 //func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
 //	zeroStates, isZero := takeZeroState(states, numGroups, idx)
 //	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
 //	tempstates = append(tempstates, states...)
 //	tempstates = append(tempstates, zeroStates...)
 //	num_appended := 0 // number of unique states addded to tempstates
 //	for isZero == true {
 //		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
 //		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
 //		if num_appended == 0 { // break if we haven't appended any more unique values
 //			break
 //		}
 //	}
 //	for _, state := range tempstates {
 //		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
 //			return true
 //		}
 //	}
 //	return false
 //}
 // Prunes the slice by removing overlapping indices.
 func pruneIndices(indices []Match) []Match {
 	// First, sort the slice by the start indices
 	sort.Slice(indices, func(i, j int) bool {
 		return indices[i][0].StartIdx < indices[j][0].StartIdx
 	})
 	toRet := make([]Match, 0, len(indices))
 	current := indices[0]
 	for _, idx := range indices[1:] {
 		// idx doesn't overlap with current (starts after current ends), so add current to result
 		// and update the current.
 		if idx[0].StartIdx >= current[0].EndIdx {
 			toRet = append(toRet, current)
 			current = idx
 		} else if idx[0].EndIdx > current[0].EndIdx {
 			// idx overlaps, but it is longer, so update current
 			current = idx
 		}
 	}
 	// Add last state
 	toRet = append(toRet, current)
 	return toRet
 }
 func copyThread(to *nfaState, from nfaState) {
 	to.threadGroups = append([]Group{}, from.threadGroups...)
 }
@@ -164,6 +71,12 @@ func (regex Reg) Find(str string) (Group, error) {
 	return getZeroGroup(match), nil
 }
 // Match returns a boolean value, indicating whether the regex found a match in the given string.
 func (regex Reg) Match(str string) bool {
 	_, err := regex.Find(str)
 	return err == nil
 }
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
 func (regex Reg) FindAll(str string) []Group {
@@ -199,7 +112,37 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 	}
 }
-// FindAllString is the 'all' version of FindString.
+// FindStringSubmatch is the 'string' version of [FindSubmatch]. It returns a slice of strings,
 // where the string at index i contains the text matched by the i-th capturing group.
 // The 0-th index represents the entire match.
 // An empty string at index n could mean:
 // ,
 //  1. Group n did not find a match
 //  2. Group n found a zero-length match
 //
 // A return value of nil indicates no match.
 func (regex Reg) FindStringSubmatch(str string) []string {
 	matchStr := make([]string, regex.numGroups+1)
 	match, err := regex.FindSubmatch(str)
 	if err != nil {
 		return nil
 	}
 	nonEmptyMatchFound := false
 	for i := range match {
 		if match[i].IsValid() {
 			matchStr[i] = str[match[i].StartIdx:match[i].EndIdx]
 			nonEmptyMatchFound = true
 		} else {
 			matchStr[i] = ""
 		}
 	}
 	if nonEmptyMatchFound == false {
 		return nil
 	}
 	return matchStr
 }
 // FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
 func (regex Reg) FindAllString(str string) []string {
@@ -244,9 +187,6 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 			indices = append(indices, matchIdx)
 		}
 	}
 	if len(indices) > 0 {
 		return pruneIndices(indices)
 	}
 	return indices
 }
@@ -293,8 +233,6 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 // Helper for FindAllMatches. Returns whether it found a match, the
 // first Match it finds, and how far it got into the string ie. where
 // the next search should start from.
 //
 //	Might return duplicates or overlapping indices, so care must be taken to prune the resulting array.
 func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
 	// Base case - exit if offset exceeds string's length
 	if offset > len(str) {
@@ -303,21 +241,9 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	}
 	resetThreads(start)
 	// Hold a list of match indices for the current run. When we
 	// can no longer find a match, the match with the largest range is
 	// chosen as the match for the entire string.
 	// This allows us to pick the longest possible match (which is how greedy matching works).
 	// COMMENT ABOVE IS CURRENTLY NOT UP-TO-DATE
 	//	tempIndices := newMatch(numGroups + 1)
 	//	foundPath := false
 	//startIdx := offset
 	//endIdx := offset
 	currentStates := make([]nfaState, 0)
 	nextStates := make([]nfaState, 0)
 	//	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
 	i := offset // Index in string
 	//startingFrom := i                  // Store starting index
 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
@@ -327,29 +253,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			return false, []Group{}, i
 		}
 	}
 	// Increment until we hit a character matching the start state (assuming not 0-state)
 	//	if start.isEmpty == false {
 	//		for i < len(str) && !start.contentContains(str, i) {
 	//			i++
 	//		}
 	//		startIdx = i
 	//		startingFrom = i
 	//		i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
 	//	}
 	//	start.threadGroups = newMatch(numGroups + 1)
 	// Check if the start state begins a group - if so, add the start index to our list
 	//if start.groupBegin {
 	//		start.threadGroups[start.groupNum].StartIdx = i
 	//		tempIndices[start.groupNum].startIdx = i
 	//}
 	start.threadGroups = newMatch(numGroups + 1)
 	start.threadGroups[0].StartIdx = i
 	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil)
 	var match Match = nil
 	//	var isEmptyAndNoAssertion bool
 	// Main loop
 	for idx := i; idx <= len(str); idx++ {
 		if len(currentStates) == 0 {
 			break
@@ -371,76 +279,6 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil)
 				}
 			}
 			//			if currentState.groupBegin {
 			//				currentState.threadGroups[currentState.groupNum].StartIdx = idx
 			//			}
 			//			if currentState.groupEnd {
 			//				currentState.threadGroups[currentState.groupNum].EndIdx = idx
 			//			}
 			// Alternation - enqueue left then right state, and continue
 			//			if currentState.isAlternation {
 			//				if currentState.isKleene { // Reverse order of adding things
 			//					rightState := currentState.splitState
 			//					copyThread(rightState, currentState)
 			//					currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
 			//					leftState := currentState.next
 			//					copyThread(leftState, currentState)
 			//					currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
 			//				} else {
 			//					leftState := currentState.next
 			//					copyThread(leftState, currentState)
 			//					currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
 			//					rightState := currentState.splitState
 			//					copyThread(rightState, currentState)
 			//					currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
 			//				}
 			//				continue
 			//			}
 			// Empty state - enqueue next state, do _not_ increment the SP
 			//			if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
 			//				isEmptyAndNoAssertion = true
 			//			}
 			//
 			//			if currentState.contentContains(str, idx) {
 			//				foundMatch = true
 			//			}
 			//
 			//			if isEmptyAndNoAssertion || foundMatch {
 			//				nextMatch := *(currentState.next)
 			//				copyThread(&nextMatch, currentState)
 			//				if currentState.groupBegin {
 			//					//	if !stateExists(currentStates, nextMatch) {
 			//					currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
 			//					//}
 			//				} else if currentState.groupEnd {
 			//					if !stateExists(currentStates, nextMatch) {
 			//						currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
 			//					}
 			//				} else if currentState.assert != noneAssert {
 			//					if !stateExists(currentStates, nextMatch) {
 			//						currentStates = append(currentStates, nextMatch)
 			//					}
 			//				} else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
 			//					if !stateExists(currentStates, nextMatch) {
 			//						currentStates = append(currentStates, nextMatch)
 			//					}
 			//				} else {
 			//					if !stateExists(nextStates, nextMatch) {
 			//						nextStates = append(nextStates, nextMatch)
 			//					}
 			//				}
 			//			}
 			//
 			//			if currentState.isLast && len(nextStates) == 0 { // Last state reached
 			//				currentState.threadGroups[0].EndIdx = idx
 			//				if idx == currentState.threadGroups[0].StartIdx {
 			//					idx += 1
 			//				}
 			//				return true, currentState.threadGroups, idx
 			//			}
 		}
 		currentStates = append([]nfaState{}, nextStates...)
 		nextStates = nil
@@ -452,196 +290,4 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 		return true, match, match[0].EndIdx
 	}
 	return false, []Group{}, i + 1
 	//		zeroStates := make([]*nfaState, 0)
 	//		// Keep taking zero-states, until there are no more left to take
 	//		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
 	//		topStateItem := currentStates.peek()
 	//		topState := topStateItem.(*priorQueueItem).state
 	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
 	//		tempStates = append(tempStates, zeroStates...)
 	//		num_appended := 0
 	//		for isZero == true {
 	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
 	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 	//			if num_appended == 0 { // Break if we haven't appended any more unique values
 	//				break
 	//			}
 	//		}
 	//		if isZero == true {
 	//			currentStates.Pop()
 	//		}
 	//
 	//		for _, state := range tempStates {
 	//			heap.Push(currentStates, newPriorQueueItem(state))
 	//		}
 	//		tempStates = nil
 	//
 	//		// Take any transitions corresponding to current character
 	//		numStatesMatched := 0            // The number of states which had at least 1 match for this round
 	//		assertionFailed := false         // Whether or not an assertion failed for this round
 	//		lastStateInList := false         // Whether or not a last state was in our list of states
 	//		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
 	//		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
 	//		for numStatesMatched == 0 && lastStateInList == false {
 	//			if currentStates.Len() == 0 {
 	//				break
 	//			}
 	//			stateItem := heap.Pop(currentStates)
 	//			state := stateItem.(*priorQueueItem).state
 	//			matches, numMatches := state.matchesFor(str, i)
 	//			if numMatches > 0 {
 	//				numStatesMatched++
 	//				tempStates = append([]*nfaState(nil), matches...)
 	//				foundPath = true
 	//				for _, m := range matches {
 	//					if m.threadGroups == nil {
 	//						m.threadGroups = newMatch(numGroups + 1)
 	//					}
 	//					m.threadSP = state.threadSP + 1
 	//					copy(m.threadGroups, state.threadGroups)
 	//				}
 	//			}
 	//			if numMatches < 0 {
 	//				assertionFailed = true
 	//			}
 	//			if state.isLast {
 	//				if state.isLookaround() {
 	//					lastLookaroundInList = true
 	//				}
 	//				lastStateInList = true
 	//				lastStatePtr = state
 	//			}
 	//		}
 	//
 	//		if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
 	//			// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
 	//			// state. The explanation below is my attempt to explain this behavior.
 	//			// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
 	//			//
 	//			// One of the states in our list was a last state and a lookaround. In this case, we
 	//			// don't abort upon failure of the assertion, because we have found
 	//			// another path to a final state.
 	//			// Even if the last state _was_ an assertion, we can use the previously
 	//			// saved indices to find a match.
 	//			if lastLookaroundInList {
 	//				break
 	//			} else {
 	//				if i == startingFrom {
 	//					i++
 	//				}
 	//				return false, []Group{}, i
 	//			}
 	//		}
 	//		// Check if we can find a state in our list that is:
 	//		// 	a. A last-state
 	//		// 	b. Empty
 	//		// 	c. Doesn't assert anything
 	//		for _, stateItem := range *currentStates {
 	//			s := stateItem.state
 	//			if s.isLast && s.isEmpty && s.assert == noneAssert {
 	//				lastStatePtr = s
 	//				lastStateInList = true
 	//			}
 	//		}
 	//		if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
 	//			for j := 1; j < numGroups+1; j++ {
 	//				tempIndices[j] = lastStatePtr.threadGroups[j]
 	//			}
 	//			endIdx = i
 	//			tempIndices[0] = Group{startIdx, endIdx}
 	//			if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
 	//				return true, tempIndices, tempIndices[0].EndIdx + 1
 	//			} else {
 	//				return true, tempIndices, tempIndices[0].EndIdx
 	//			}
 	//		}
 	//
 	//		// Check if we can find a zero-length match
 	//		if foundPath == false {
 	//			currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
 	//				return item.state
 	//			})
 	//			if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
 	//				if tempIndices[0].IsValid() == false {
 	//					tempIndices[0] = Group{startIdx, startIdx}
 	//				}
 	//			}
 	//			// If we haven't moved in the string, increment the counter by 1
 	//			// to ensure we don't keep trying the same string over and over.
 	//			//			if i == startingFrom {
 	//			startIdx++
 	//			//	i++
 	//			//			}
 	//			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
 	//				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
 	//					return true, tempIndices, tempIndices[0].EndIdx + 1
 	//				} else {
 	//					return true, tempIndices, tempIndices[0].EndIdx
 	//				}
 	//			}
 	//			return false, []Group{}, startIdx
 	//		}
 	//		currentStates = &priorityQueue{}
 	//		slices.Reverse(tempStates)
 	//		for _, state := range tempStates {
 	//			heap.Push(currentStates, newPriorQueueItem(state))
 	//		}
 	//		tempStates = nil
 	//
 	//		i++
 	//	}
 	//
 	// // End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
 	// // This is the exact same algorithm used inside the loop, so I should probably put it in a function.
 	//
 	//	if currentStates.Len() > 0 {
 	//		topStateItem := currentStates.peek()
 	//		topState := topStateItem.(*priorQueueItem).state
 	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
 	//		tempStates = append(tempStates, zeroStates...)
 	//		num_appended := 0 // Number of unique states addded to tempStates
 	//		for isZero == true {
 	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
 	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 	//			if num_appended == 0 { // Break if we haven't appended any more unique values
 	//				break
 	//			}
 	//		}
 	//	}
 	//
 	//	for _, state := range tempStates {
 	//		heap.Push(currentStates, newPriorQueueItem(state))
 	//	}
 	//
 	// tempStates = nil
 	//
 	//	for _, stateItem := range *currentStates {
 	//		state := stateItem.state
 	//		// Only add the match if the start index is in bounds. If the state has an assertion,
 	//		// make sure the assertion checks out.
 	//		if state.isLast && i <= len(str) {
 	//			if state.assert == noneAssert || state.checkAssertion(str, i) {
 	//				for j := 1; j < numGroups+1; j++ {
 	//					tempIndices[j] = state.threadGroups[j]
 	//				}
 	//				endIdx = i
 	//				tempIndices[0] = Group{startIdx, endIdx}
 	//			}
 	//		}
 	//	}
 	//
 	//	if tempIndices.numValidGroups() > 0 {
 	//		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
 	//			return true, tempIndices, tempIndices[0].EndIdx + 1
 	//		} else {
 	//			return true, tempIndices, tempIndices[0].EndIdx
 	//		}
 	//	}
 	//
 	// if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
 	//
 	//		startIdx++
 	//	}
 	//
 	// return false, []Group{}, startIdx
 }
--- a/regex/misc.go
+++ b/regex/misc.go
@@ -48,49 +48,6 @@ func isNormalChar(c rune) bool {
 	return !slices.Contains(specialChars, c)
 }
 // Ensure that the given elements are only appended to the given slice if they
 // don't already exist. Returns the new slice, and the number of unique items appended.
 func uniqueAppend[T comparable](slc []T, items ...T) ([]T, int) {
 	num_appended := 0
 	for _, item := range items {
 		if !slices.Contains(slc, item) {
 			slc = append(slc, item)
 			num_appended++
 		}
 	}
 	return slc, num_appended
 }
 func uniqueAppendFunc[T any](slc []T, fn func(T, T) bool, items ...T) ([]T, int) {
 	toRet := make([]T, len(slc))
 	num_appended := 0
 	copy(toRet, slc)
 	for _, item := range items {
 		itemExists := false
 		for _, val := range slc {
 			if fn(item, val) {
 				itemExists = true
 			}
 		}
 		if !itemExists {
 			toRet = append(toRet, item)
 			num_appended++
 		}
 	}
 	return toRet, num_appended
 }
 // Returns true only if all the given elements are equal
 func allEqual[T comparable](items ...T) bool {
 	first := items[0]
 	for _, item := range items {
 		if item != first {
 			return false
 		}
 	}
 	return true
 }
 // Map function - convert a slice of T to a slice of V, based on a function
 // that maps a T to a V
 func funcMap[T, V any](slc []T, fn func(T) V) []V {
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -183,7 +183,7 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 			strToMatch = string(runesToMatch)
 		}
-		regComp := Reg{startState, s.lookaroundNumCaptureGroups}
+		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex}
 		matchIndices := regComp.FindAll(strToMatch)
 		numMatchesFound := 0
--- a/regex/priorityQueue.go
+++ b/regex/priorityQueue.go
@@ -1,89 +0,0 @@
 package regex
 import "container/heap"
 // Implement a priority queue using container/heap
 const (
 	min_priority int = iota
 	zerostate_priority
 	alternation_priority
 	kleene_priority
 	char_priority
 	max_priority
 )
 func getPriority(state *nfaState) int {
 	if state.isKleene {
 		return zerostate_priority
 	} else if state.isAlternation {
 		return alternation_priority
 	} else {
 		if state.isEmpty {
 			return zerostate_priority
 		} else {
 			return char_priority
 		}
 	}
 }
 type priorQueueItem struct {
 	state    *nfaState
 	priority int
 	index    int
 }
 func newPriorQueueItem(state *nfaState) *priorQueueItem {
 	return &priorQueueItem{
 		state:    state,
 		index:    -1,
 		priority: getPriority(state),
 	}
 }
 type priorityQueue []*priorQueueItem
 func (pq priorityQueue) Len() int {
 	return len(pq)
 }
 func (pq priorityQueue) Less(i, j int) bool {
 	if pq[i].priority == pq[j].priority {
 		return pq[i].index < pq[j].index
 	}
 	return pq[i].priority > pq[j].priority // We want max-heap, so we use greater-than
 }
 func (pq priorityQueue) Swap(i, j int) {
 	pq[i], pq[j] = pq[j], pq[i]
 	pq[i].index = i
 	pq[j].index = j
 }
 func (pq *priorityQueue) Push(x any) {
 	length := len(*pq)
 	item := x.(*priorQueueItem)
 	item.index = length
 	*pq = append(*pq, item)
 }
 func (pq *priorityQueue) Pop() any {
 	old := *pq
 	n := len(old)
 	item := old[n-1]
 	old[n-1] = nil
 	item.index = -1
 	*pq = old[0 : n-1]
 	return item
 }
 func (pq *priorityQueue) peek() any {
 	queue := *pq
 	n := len(queue)
 	return queue[n-1]
 }
 func (pq *priorityQueue) update(item *priorQueueItem, value *nfaState, priority int) {
 	item.state = value
 	item.priority = priority
 	heap.Fix(pq, item.index)
 }
--- a/regex/range2regex.go
+++ b/regex/range2regex.go
@@ -109,7 +109,7 @@ func range2regex(start int, end int) (string, error) {
 		startSlc := intToSlc(rg.start)
 		endSlc := intToSlc(rg.end)
 		if len(startSlc) != len(endSlc) {
-			return "", fmt.Errorf("Error parsing numeric range")
+			return "", fmt.Errorf("error parsing numeric range")
 		}
 		for i := range startSlc {
 			if startSlc[i] == endSlc[i] {
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -528,7 +528,7 @@ var groupTests = []struct {
 }{
 	{"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
 	{"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
-	{"(0)", nil, "ab", []Match{[]Group{}}},
+	{"(0)", nil, "ab", []Match{}},
 	{"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
 	{"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
 	{"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
@@ -538,9 +538,8 @@ var groupTests = []struct {
 	{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
 	{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
 	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
 	{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
+	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
 	{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
 	{"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
 	{"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
@@ -578,7 +577,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `bcdd`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, nil, `a`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, nil, `a`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\41`, nil, `a!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
@@ -633,7 +632,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `BCDD`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, []ReFlag{RE_CASE_INSENSITIVE}, `(A, B)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
 	{`(a)(b)c|ab`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}}}},
@@ -743,7 +742,7 @@ func TestFindString(t *testing.T) {
 				foundString := regComp.FindString(test.str)
 				if len(test.result) == 0 {
 					if foundString != "" {
-						t.Errorf("Expected no match got %v\n", foundString)
+						t.Errorf("Wanted no match got %v\n", foundString)
 					}
 				} else {
 					expectedString := test.str[test.result[0].StartIdx:test.result[0].EndIdx]
@@ -791,11 +790,68 @@ func TestFindSubmatch(t *testing.T) {
 				}
 			}
 			match, err := regComp.FindSubmatch(test.str)
 			if err != nil {
 				if len(test.result) != 0 {
 					t.Errorf("Wanted %v got no match\n", test.result[0])
 				}
 			} else if len(test.result) == 0 {
 				t.Errorf("Wanted no match got %v\n", match)
 			}
 			for i := range match {
 				if match[i].IsValid() {
 					if test.result[0][i] != match[i] {
 						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 					}
 				} else {
 					if i < len(test.result) && test.result[0][i].IsValid() {
 						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 					}
 				}
 			}
 		})
 	}
 }
 func TestFindStringSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
 			regComp, err := Compile(test.re, test.flags...)
 			if err != nil {
 				if test.result != nil {
 					panic(err)
 				}
 			}
 			matchStr := regComp.FindStringSubmatch(test.str)
 			if matchStr == nil {
 				if len(test.result) != 0 {
 					expectedStr := funcMap(test.result[0], func(g Group) string {
 						if g.IsValid() {
 							return test.str[g.StartIdx:g.EndIdx]
 						} else {
 							return ""
 						}
 					})
 					t.Errorf("Wanted %v got no match\n", expectedStr)
 				}
 			} else if len(test.result) == 0 {
 				t.Errorf("Wanted no match got %v\n", matchStr)
 			} else {
 				expectedStr := funcMap(test.result[0], func(g Group) string {
 					if g.IsValid() {
 						return test.str[g.StartIdx:g.EndIdx]
 					} else {
 						return ""
 					}
 				})
 				for i, groupStr := range matchStr {
 					if groupStr == "" {
 						if i < len(expectedStr) && expectedStr[i] != "" {
 							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
 						}
 					} else {
 						if expectedStr[i] != groupStr {
 							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
 						}
 					}
 				}
 			}
 		})
@@ -817,6 +873,10 @@ func TestFindAllSubmatch(t *testing.T) {
 						if test.result[i][j] != matchIndices[i][j] {
 							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 						}
 					} else {
 						if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
 							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 						}
 					}
 				}
 			}
Author	SHA1	Message	Date
Aadhavan Srinivasan	47f88c817f	Fixed typo	2025-02-09 15:14:17 -05:00
Aadhavan Srinivasan	835d495990	Removed capitalization for error message (staticcheck)	2025-02-09 09:14:45 -05:00
Aadhavan Srinivasan	76e0170cb9	Removed unused function	2025-02-09 09:13:52 -05:00
Aadhavan Srinivasan	d172a58258	Throw error if match isn't found but test.result has >0 elements	2025-02-09 09:13:29 -05:00
Aadhavan Srinivasan	7231169270	Removed unused functions	2025-02-09 09:13:03 -05:00
Aadhavan Srinivasan	e546f01c20	Removed redundant return (staticcheck)	2025-02-09 09:12:55 -05:00
Aadhavan Srinivasan	b7467a00f1	Removed priorityQueue (unused)	2025-02-09 09:07:43 -05:00
Aadhavan Srinivasan	c6ad4caa0d	Removed a bunch of unused code (let's go!!!)	2025-02-09 09:06:40 -05:00
Aadhavan Srinivasan	6334435b83	Updated tests since the engine uses Perl matching instead of POSIX matching; added tests for FindStringSubmatch	2025-02-09 09:01:42 -05:00
Aadhavan Srinivasan	78fb5606dd	Use new definition of Reg	2025-02-09 08:59:16 -05:00
Aadhavan Srinivasan	eddd2ae700	Updated documentation	2025-02-09 08:58:58 -05:00
Aadhavan Srinivasan	c577064977	Added string field to Reg, that contains the expression string; wrote method to return the string	2025-02-09 08:58:46 -05:00
Aadhavan Srinivasan	d4e3942d27	Added Match() and FindStringSubmatch(); removed old code; updated comments	2025-02-09 08:58:09 -05:00