From fbc9dfcc95c50b067e8b1ae71b969418ebcdb4f3 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Mon, 3 Feb 2025 16:47:53 -0500
Subject: [PATCH 01/48] Trying something out; we'll see if it works

---
 regex/priorityQueue.go | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/regex/priorityQueue.go b/regex/priorityQueue.go
index 59592a9..ae43e86 100644
--- a/regex/priorityQueue.go
+++ b/regex/priorityQueue.go
@@ -15,8 +15,8 @@ const (
 
 func getPriority(state *nfaState) int {
 	if state.isKleene {
-		return kleene_priority
-	} else if state.isQuestion || state.isAlternation {
+		return zerostate_priority
+	} else if state.isAlternation {
 		return alternation_priority
 	} else {
 		if state.isEmpty {
@@ -33,6 +33,14 @@ type priorQueueItem struct {
 	index    int
 }
 
+func newPriorQueueItem(state *nfaState) *priorQueueItem {
+	return &priorQueueItem{
+		state:    state,
+		index:    -1,
+		priority: getPriority(state),
+	}
+}
+
 type priorityQueue []*priorQueueItem
 
 func (pq priorityQueue) Len() int {
@@ -41,7 +49,7 @@ func (pq priorityQueue) Len() int {
 
 func (pq priorityQueue) Less(i, j int) bool {
 	if pq[i].priority == pq[j].priority {
-		return pq[i].index > pq[j].index
+		return pq[i].index < pq[j].index
 	}
 	return pq[i].priority > pq[j].priority // We want max-heap, so we use greater-than
 }
@@ -68,6 +76,11 @@ func (pq *priorityQueue) Pop() any {
 	*pq = old[0 : n-1]
 	return item
 }
+func (pq *priorityQueue) peek() any {
+	queue := *pq
+	n := len(queue)
+	return queue[n-1]
+}
 
 func (pq *priorityQueue) update(item *priorQueueItem, value *nfaState, priority int) {
 	item.state = value
-- 
2.30.2


From 09812956ac147f8d5a44959700d5602574e87d8e Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Mon, 3 Feb 2025 16:48:09 -0500
Subject: [PATCH 02/48] Disable all optimizations

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 252549f..4a309a9 100644
--- a/Makefile
+++ b/Makefile
@@ -6,8 +6,8 @@ fmt:
 vet: fmt
 	go vet ./...
 buildLib: vet
-	go build -gcflags="-N -l" ./...
+	go build -gcflags="all=-N -l" ./...
 buildCmd: buildLib
-	go build -C cmd/ -gcflags="-N -l" -o re ./...
+	go build -C cmd/ -gcflags="all=-N -l" -o re ./...
 test: buildCmd
 	go test -v ./...
-- 
2.30.2


From 1fd48ae6143e57e829e7cb93608356890c9c8606 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Mon, 3 Feb 2025 16:49:10 -0500
Subject: [PATCH 03/48] Store the current string pointer as a 'thread variable'
 (allows us to simulate backtracking)

---
 regex/nfa.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/regex/nfa.go b/regex/nfa.go
index 8f63eb0..0ceea1b 100644
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -45,6 +45,7 @@ type nfaState struct {
 	// The following properties depend on the current match - I should think about resetting them for every match.
 	zeroMatchFound bool    // Whether or not the state has been used for a zero-length match - only relevant for zero states
 	threadGroups   []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
+	threadSP       int     // The string pointer of the thread - where it is in the input string
 }
 
 // Clones the NFA starting from the given state.
@@ -120,6 +121,7 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	}
 	// Assuming it hasn't been visited
 	state.threadGroups = nil
+	state.threadSP = 0
 	visitedMap[state] = true
 	for _, v := range state.transitions {
 		for _, nextState := range v {
-- 
2.30.2


From e167cdb2cbac9c48ced4370151ebe848e5196012 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Mon, 3 Feb 2025 16:49:30 -0500
Subject: [PATCH 04/48] Fixed mistake in test output

---
 regex/re_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/regex/re_test.go b/regex/re_test.go
index 8d24304..2cccc72 100644
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -701,7 +701,7 @@ func TestFind(t *testing.T) {
 					if len(test.result) == 0 {
 						return // Manually pass the test, because this is the expected behavior
 					} else {
-						t.Errorf("Wanted no match Got %v\n", groupIndex)
+						t.Errorf("Wanted %v Got no matches\n", test.result)
 					}
 				} else {
 					if groupIndex != test.result[0] {
-- 
2.30.2


From ad273b0c682c63d080da3b30da504b67bd53d482 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Mon, 3 Feb 2025 16:50:11 -0500
Subject: [PATCH 05/48] Trying to emulate backtracking by using string pointers
 within threads (something similar to rsc's 2nd regexp article)

---
 regex/matching.go | 70 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 22 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 17c2bcb..af2ede3 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -1,6 +1,7 @@
 package regex
 
 import (
+	"container/heap"
 	"fmt"
 	"slices"
 	"sort"
@@ -271,7 +272,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	foundPath := false
 	startIdx := offset
 	endIdx := offset
-	currentStates := make([]*nfaState, 0)
+	currentStates := &priorityQueue{}
+	heap.Init(currentStates)
 	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
 	i := offset                        // Index in string
 	startingFrom := i                  // Store starting index
@@ -301,16 +303,19 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 		//		tempIndices[start.groupNum].startIdx = i
 	}
 
-	currentStates = append(currentStates, start)
-
+	start.threadSP = i
+	heap.Push(currentStates, newPriorQueueItem(start))
 	// Main loop
-	for i < len(str) {
+	for currentStates.Len() > 0 {
+		currentState := heap.Pop(currentStates)
 		foundPath = false
 
 		zeroStates := make([]*nfaState, 0)
 		// Keep taking zero-states, until there are no more left to take
 		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
-		zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
+		topStateItem := currentStates.peek()
+		topState := topStateItem.(*priorQueueItem).state
+		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
 		tempStates = append(tempStates, zeroStates...)
 		num_appended := 0
 		for isZero == true {
@@ -320,8 +325,13 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 				break
 			}
 		}
+		if isZero == true {
+			currentStates.Pop()
+		}
 
-		currentStates = slices.Concat(currentStates, tempStates)
+		for _, state := range tempStates {
+			heap.Push(currentStates, newPriorQueueItem(state))
+		}
 		tempStates = nil
 
 		// Take any transitions corresponding to current character
@@ -331,10 +341,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
 		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
 		for numStatesMatched == 0 && lastStateInList == false {
-			if len(currentStates) == 0 {
+			if currentStates.Len() == 0 {
 				break
 			}
-			state, _ := pop(&currentStates)
+			stateItem := heap.Pop(currentStates)
+			state := stateItem.(*priorQueueItem).state
 			matches, numMatches := state.matchesFor(str, i)
 			if numMatches > 0 {
 				numStatesMatched++
@@ -344,6 +355,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 					if m.threadGroups == nil {
 						m.threadGroups = newMatch(numGroups + 1)
 					}
+					m.threadSP = state.threadSP + 1
 					copy(m.threadGroups, state.threadGroups)
 				}
 			}
@@ -382,7 +394,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 		// 	a. A last-state
 		// 	b. Empty
 		// 	c. Doesn't assert anything
-		for _, s := range currentStates {
+		for _, stateItem := range *currentStates {
+			s := stateItem.state
 			if s.isLast && s.isEmpty && s.assert == noneAssert {
 				lastStatePtr = s
 				lastStateInList = true
@@ -403,7 +416,10 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 		// Check if we can find a zero-length match
 		if foundPath == false {
-			if ok := zeroMatchPossible(str, i, numGroups, currentStates...); ok {
+			currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
+				return item.state
+			})
+			if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
 				if tempIndices[0].IsValid() == false {
 					tempIndices[0] = Group{startIdx, startIdx}
 				}
@@ -423,8 +439,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			}
 			return false, []Group{}, startIdx
 		}
-		currentStates = make([]*nfaState, len(tempStates))
-		copy(currentStates, tempStates)
+		currentStates = &priorityQueue{}
+		slices.Reverse(tempStates)
+		for _, state := range tempStates {
+			heap.Push(currentStates, newPriorQueueItem(state))
+		}
 		tempStates = nil
 
 		i++
@@ -432,21 +451,28 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 	// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
 	// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
-	zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
-	tempStates = append(tempStates, zeroStates...)
-	num_appended := 0 // Number of unique states addded to tempStates
-	for isZero == true {
-		zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
-		tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
-		if num_appended == 0 { // Break if we haven't appended any more unique values
-			break
+	if currentStates.Len() > 0 {
+		topStateItem := currentStates.peek()
+		topState := topStateItem.(*priorQueueItem).state
+		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
+		tempStates = append(tempStates, zeroStates...)
+		num_appended := 0 // Number of unique states addded to tempStates
+		for isZero == true {
+			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
+			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
+			if num_appended == 0 { // Break if we haven't appended any more unique values
+				break
+			}
 		}
 	}
 
-	currentStates = append(currentStates, tempStates...)
+	for _, state := range tempStates {
+		heap.Push(currentStates, newPriorQueueItem(state))
+	}
 	tempStates = nil
 
-	for _, state := range currentStates {
+	for _, stateItem := range *currentStates {
+		state := stateItem.state
 		// Only add the match if the start index is in bounds. If the state has an assertion,
 		// make sure the assertion checks out.
 		if state.isLast && i <= len(str) {
-- 
2.30.2


From de0d7345a8792180d05823067b92dc7934b927eb Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Mon, 3 Feb 2025 21:59:05 -0500
Subject: [PATCH 06/48] Store left and right branches of alternation separately

---
 regex/nfa.go | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/regex/nfa.go b/regex/nfa.go
index 0ceea1b..f03edab 100644
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -33,6 +33,8 @@ type nfaState struct {
 	isKleene                   bool                // Identifies whether current node is a 0-state representing Kleene star
 	isQuestion                 bool                // Identifies whether current node is a 0-state representing the question operator
 	isAlternation              bool                // Identifies whether current node is a 0-state representing an alternation
+	leftState                  *nfaState           // Only for alternation states - the 'left' branch of the alternation
+	rightState                 *nfaState           // Only for alternation states - the 'right' branch of the alternation
 	assert                     assertType          // Type of assertion of current node - NONE means that the node doesn't assert anything
 	allChars                   bool                // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
 	except                     []rune              // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
@@ -106,6 +108,15 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 		clone.lookaroundNFA = clone
 	}
 	clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap)
+	if stateToClone.leftState == stateToClone {
+		clone.leftState = clone
+	}
+	clone.leftState = cloneStateHelper(stateToClone.leftState, cloneMap)
+	if stateToClone.rightState == stateToClone {
+		clone.rightState = clone
+	}
+	clone.rightState = cloneStateHelper(stateToClone.rightState, cloneMap)
+
 	return clone
 }
 
@@ -213,6 +224,9 @@ func (s nfaState) contentContains(str []rune, idx int) bool {
 	if s.assert != noneAssert {
 		return s.checkAssertion(str, idx)
 	}
+	if idx >= len(str) {
+		return false
+	}
 	if s.allChars {
 		return !slices.Contains(slices.Concat(notDotChars, s.except), str[idx]) // Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
 	}
@@ -348,6 +362,8 @@ func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
 	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
 	toReturn.isAlternation = true
+	toReturn.leftState = s1
+	toReturn.rightState = s2
 
 	return toReturn
 }
@@ -358,7 +374,7 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
 	s2.content = newContents(epsilon)
 	s2.output = append(s2.output, s2)
 	s2.isEmpty = true
-	s2.isQuestion = true
+	s2.isAlternation = true
 	s3 := alternate(s1, s2)
 	return s3
 }
-- 
2.30.2


From 5563a70568a645c959d798598ea5c3e66c224d16 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Mon, 3 Feb 2025 21:59:41 -0500
Subject: [PATCH 07/48] Reverse the order in which I pop states for
 alternation, because this messes with the left branch-right branch thing

---
 regex/compile.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/regex/compile.go b/regex/compile.go
index b40c371..1068966 100644
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -1059,8 +1059,8 @@ func thompson(re []postfixNode) (Reg, error) {
 			// 	'|a'
 			// 	'^a|'
 			// 	'^|a'
-			s1, err1 := pop(&nfa)
-			s2, err2 := pop(&nfa)
+			s2, err1 := pop(&nfa)
+			s1, err2 := pop(&nfa)
 			if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN
 				if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back
 					nfa = append(nfa, s2)
-- 
2.30.2


From 753e973d82f512719336a8217f0fff0493eafb84 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Mon, 3 Feb 2025 22:00:04 -0500
Subject: [PATCH 08/48] Started rewrite of matching algorithm, got
 concatenation and alternation done, kleene and zero-state stuff is next

---
 regex/matching.go | 458 +++++++++++++++++++++++++---------------------
 1 file changed, 254 insertions(+), 204 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index af2ede3..ad7d15b 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -1,7 +1,6 @@
 package regex
 
 import (
-	"container/heap"
 	"fmt"
 	"slices"
 	"sort"
@@ -267,16 +266,15 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	// chosen as the match for the entire string.
 	// This allows us to pick the longest possible match (which is how greedy matching works).
 	// COMMENT ABOVE IS CURRENTLY NOT UP-TO-DATE
-	tempIndices := newMatch(numGroups + 1)
+	//	tempIndices := newMatch(numGroups + 1)
 
-	foundPath := false
-	startIdx := offset
-	endIdx := offset
-	currentStates := &priorityQueue{}
-	heap.Init(currentStates)
-	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
-	i := offset                        // Index in string
-	startingFrom := i                  // Store starting index
+	//	foundPath := false
+	//startIdx := offset
+	//endIdx := offset
+	currentStates := make([]*nfaState, 0)
+	//	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
+	i := offset // Index in string
+	//startingFrom := i                  // Store starting index
 
 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
@@ -287,214 +285,266 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 		}
 	}
 	// Increment until we hit a character matching the start state (assuming not 0-state)
-	if start.isEmpty == false {
-		for i < len(str) && !start.contentContains(str, i) {
-			i++
-		}
-		startIdx = i
-		startingFrom = i
-		i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
-	}
+	//	if start.isEmpty == false {
+	//		for i < len(str) && !start.contentContains(str, i) {
+	//			i++
+	//		}
+	//		startIdx = i
+	//		startingFrom = i
+	//		i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
+	//	}
 
-	start.threadGroups = newMatch(numGroups + 1)
+	//	start.threadGroups = newMatch(numGroups + 1)
 	// Check if the start state begins a group - if so, add the start index to our list
-	if start.groupBegin {
-		start.threadGroups[start.groupNum].StartIdx = i
-		//		tempIndices[start.groupNum].startIdx = i
-	}
+	//if start.groupBegin {
+	//		start.threadGroups[start.groupNum].StartIdx = i
+	//		tempIndices[start.groupNum].startIdx = i
+	//}
 
 	start.threadSP = i
-	heap.Push(currentStates, newPriorQueueItem(start))
+	currentStates = append(currentStates, start)
+	var foundMatch bool
 	// Main loop
-	for currentStates.Len() > 0 {
-		currentState := heap.Pop(currentStates)
-		foundPath = false
+	for len(currentStates) > 0 {
+		currentState, _ := pop(&currentStates)
+		idx := currentState.threadSP
+		foundMatch = false
 
-		zeroStates := make([]*nfaState, 0)
-		// Keep taking zero-states, until there are no more left to take
-		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
-		topStateItem := currentStates.peek()
-		topState := topStateItem.(*priorQueueItem).state
-		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
-		tempStates = append(tempStates, zeroStates...)
-		num_appended := 0
-		for isZero == true {
-			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
-			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
-			if num_appended == 0 { // Break if we haven't appended any more unique values
-				break
-			}
+		if currentState.threadGroups == nil {
+			currentState.threadGroups = newMatch(numGroups + 1)
+			currentState.threadGroups[0].StartIdx = idx
 		}
-		if isZero == true {
-			currentStates.Pop()
-		}
-
-		for _, state := range tempStates {
-			heap.Push(currentStates, newPriorQueueItem(state))
-		}
-		tempStates = nil
-
-		// Take any transitions corresponding to current character
-		numStatesMatched := 0            // The number of states which had at least 1 match for this round
-		assertionFailed := false         // Whether or not an assertion failed for this round
-		lastStateInList := false         // Whether or not a last state was in our list of states
-		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
-		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
-		for numStatesMatched == 0 && lastStateInList == false {
-			if currentStates.Len() == 0 {
-				break
+		if currentState.groupBegin {
+			currentState.threadGroups[currentState.groupNum].StartIdx = idx
+		} else if currentState.groupEnd {
+			currentState.threadGroups[currentState.groupNum].EndIdx = idx
+		} else if currentState.isKleene {
+			// Append the
+		} else if currentState.isAlternation {
+			rightState := currentState.rightState
+			rightState.threadGroups = currentState.threadGroups
+			rightState.threadSP = currentState.threadSP
+			currentStates = append(currentStates, currentState.rightState)
+			leftState := currentState.leftState
+			leftState.threadGroups = currentState.threadGroups
+			leftState.threadSP = currentState.threadSP
+			currentStates = append(currentStates, currentState.leftState)
+			continue
+		} else if currentState.contentContains(str, idx) {
+			foundMatch = true
+			allMatches := make([]*nfaState, 0)
+			for _, v := range currentState.transitions {
+				allMatches = append(allMatches, v...)
 			}
-			stateItem := heap.Pop(currentStates)
-			state := stateItem.(*priorQueueItem).state
-			matches, numMatches := state.matchesFor(str, i)
-			if numMatches > 0 {
-				numStatesMatched++
-				tempStates = append([]*nfaState(nil), matches...)
-				foundPath = true
-				for _, m := range matches {
-					if m.threadGroups == nil {
-						m.threadGroups = newMatch(numGroups + 1)
-					}
-					m.threadSP = state.threadSP + 1
-					copy(m.threadGroups, state.threadGroups)
-				}
-			}
-			if numMatches < 0 {
-				assertionFailed = true
-			}
-			if state.isLast {
-				if state.isLookaround() {
-					lastLookaroundInList = true
-				}
-				lastStateInList = true
-				lastStatePtr = state
-			}
-		}
-
-		if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
-			// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
-			// state. The explanation below is my attempt to explain this behavior.
-			// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
-			//
-			// One of the states in our list was a last state and a lookaround. In this case, we
-			// don't abort upon failure of the assertion, because we have found
-			// another path to a final state.
-			// Even if the last state _was_ an assertion, we can use the previously
-			// saved indices to find a match.
-			if lastLookaroundInList {
-				break
-			} else {
-				if i == startingFrom {
-					i++
-				}
-				return false, []Group{}, i
-			}
-		}
-		// Check if we can find a state in our list that is:
-		// 	a. A last-state
-		// 	b. Empty
-		// 	c. Doesn't assert anything
-		for _, stateItem := range *currentStates {
-			s := stateItem.state
-			if s.isLast && s.isEmpty && s.assert == noneAssert {
-				lastStatePtr = s
-				lastStateInList = true
-			}
-		}
-		if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
-			for j := 1; j < numGroups+1; j++ {
-				tempIndices[j] = lastStatePtr.threadGroups[j]
-			}
-			endIdx = i
-			tempIndices[0] = Group{startIdx, endIdx}
-			if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
-				return true, tempIndices, tempIndices[0].EndIdx + 1
-			} else {
-				return true, tempIndices, tempIndices[0].EndIdx
-			}
-		}
-
-		// Check if we can find a zero-length match
-		if foundPath == false {
-			currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
-				return item.state
-			})
-			if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
-				if tempIndices[0].IsValid() == false {
-					tempIndices[0] = Group{startIdx, startIdx}
-				}
-			}
-			// If we haven't moved in the string, increment the counter by 1
-			// to ensure we don't keep trying the same string over and over.
-			//			if i == startingFrom {
-			startIdx++
-			//	i++
-			//			}
-			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
-				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
-					return true, tempIndices, tempIndices[0].EndIdx + 1
+			slices.Reverse(allMatches)
+			for _, m := range allMatches {
+				m.threadGroups = currentState.threadGroups
+				if currentState.assert == noneAssert {
+					m.threadSP = idx + 1
 				} else {
-					return true, tempIndices, tempIndices[0].EndIdx
+					m.threadSP = idx
 				}
 			}
-			return false, []Group{}, startIdx
+			currentStates = append(currentStates, allMatches...)
 		}
-		currentStates = &priorityQueue{}
-		slices.Reverse(tempStates)
-		for _, state := range tempStates {
-			heap.Push(currentStates, newPriorQueueItem(state))
-		}
-		tempStates = nil
 
-		i++
-	}
+		if currentState.isLast && foundMatch { // Last state reached
+			currentState.threadGroups[0].EndIdx = idx + 1
+			return true, currentState.threadGroups, idx + 1
 
-	// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
-	// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
-	if currentStates.Len() > 0 {
-		topStateItem := currentStates.peek()
-		topState := topStateItem.(*priorQueueItem).state
-		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
-		tempStates = append(tempStates, zeroStates...)
-		num_appended := 0 // Number of unique states addded to tempStates
-		for isZero == true {
-			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
-			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
-			if num_appended == 0 { // Break if we haven't appended any more unique values
-				break
-			}
 		}
 	}
-
-	for _, state := range tempStates {
-		heap.Push(currentStates, newPriorQueueItem(state))
-	}
-	tempStates = nil
-
-	for _, stateItem := range *currentStates {
-		state := stateItem.state
-		// Only add the match if the start index is in bounds. If the state has an assertion,
-		// make sure the assertion checks out.
-		if state.isLast && i <= len(str) {
-			if state.assert == noneAssert || state.checkAssertion(str, i) {
-				for j := 1; j < numGroups+1; j++ {
-					tempIndices[j] = state.threadGroups[j]
-				}
-				endIdx = i
-				tempIndices[0] = Group{startIdx, endIdx}
-			}
-		}
-	}
-
-	if tempIndices.numValidGroups() > 0 {
-		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
-			return true, tempIndices, tempIndices[0].EndIdx + 1
-		} else {
-			return true, tempIndices, tempIndices[0].EndIdx
-		}
-	}
-	if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
-		startIdx++
-	}
-	return false, []Group{}, startIdx
+	return false, []Group{}, i + 1
+	//		zeroStates := make([]*nfaState, 0)
+	//		// Keep taking zero-states, until there are no more left to take
+	//		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
+	//		topStateItem := currentStates.peek()
+	//		topState := topStateItem.(*priorQueueItem).state
+	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
+	//		tempStates = append(tempStates, zeroStates...)
+	//		num_appended := 0
+	//		for isZero == true {
+	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
+	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
+	//			if num_appended == 0 { // Break if we haven't appended any more unique values
+	//				break
+	//			}
+	//		}
+	//		if isZero == true {
+	//			currentStates.Pop()
+	//		}
+	//
+	//		for _, state := range tempStates {
+	//			heap.Push(currentStates, newPriorQueueItem(state))
+	//		}
+	//		tempStates = nil
+	//
+	//		// Take any transitions corresponding to current character
+	//		numStatesMatched := 0            // The number of states which had at least 1 match for this round
+	//		assertionFailed := false         // Whether or not an assertion failed for this round
+	//		lastStateInList := false         // Whether or not a last state was in our list of states
+	//		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
+	//		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
+	//		for numStatesMatched == 0 && lastStateInList == false {
+	//			if currentStates.Len() == 0 {
+	//				break
+	//			}
+	//			stateItem := heap.Pop(currentStates)
+	//			state := stateItem.(*priorQueueItem).state
+	//			matches, numMatches := state.matchesFor(str, i)
+	//			if numMatches > 0 {
+	//				numStatesMatched++
+	//				tempStates = append([]*nfaState(nil), matches...)
+	//				foundPath = true
+	//				for _, m := range matches {
+	//					if m.threadGroups == nil {
+	//						m.threadGroups = newMatch(numGroups + 1)
+	//					}
+	//					m.threadSP = state.threadSP + 1
+	//					copy(m.threadGroups, state.threadGroups)
+	//				}
+	//			}
+	//			if numMatches < 0 {
+	//				assertionFailed = true
+	//			}
+	//			if state.isLast {
+	//				if state.isLookaround() {
+	//					lastLookaroundInList = true
+	//				}
+	//				lastStateInList = true
+	//				lastStatePtr = state
+	//			}
+	//		}
+	//
+	//		if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
+	//			// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
+	//			// state. The explanation below is my attempt to explain this behavior.
+	//			// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
+	//			//
+	//			// One of the states in our list was a last state and a lookaround. In this case, we
+	//			// don't abort upon failure of the assertion, because we have found
+	//			// another path to a final state.
+	//			// Even if the last state _was_ an assertion, we can use the previously
+	//			// saved indices to find a match.
+	//			if lastLookaroundInList {
+	//				break
+	//			} else {
+	//				if i == startingFrom {
+	//					i++
+	//				}
+	//				return false, []Group{}, i
+	//			}
+	//		}
+	//		// Check if we can find a state in our list that is:
+	//		// 	a. A last-state
+	//		// 	b. Empty
+	//		// 	c. Doesn't assert anything
+	//		for _, stateItem := range *currentStates {
+	//			s := stateItem.state
+	//			if s.isLast && s.isEmpty && s.assert == noneAssert {
+	//				lastStatePtr = s
+	//				lastStateInList = true
+	//			}
+	//		}
+	//		if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
+	//			for j := 1; j < numGroups+1; j++ {
+	//				tempIndices[j] = lastStatePtr.threadGroups[j]
+	//			}
+	//			endIdx = i
+	//			tempIndices[0] = Group{startIdx, endIdx}
+	//			if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
+	//				return true, tempIndices, tempIndices[0].EndIdx + 1
+	//			} else {
+	//				return true, tempIndices, tempIndices[0].EndIdx
+	//			}
+	//		}
+	//
+	//		// Check if we can find a zero-length match
+	//		if foundPath == false {
+	//			currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
+	//				return item.state
+	//			})
+	//			if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
+	//				if tempIndices[0].IsValid() == false {
+	//					tempIndices[0] = Group{startIdx, startIdx}
+	//				}
+	//			}
+	//			// If we haven't moved in the string, increment the counter by 1
+	//			// to ensure we don't keep trying the same string over and over.
+	//			//			if i == startingFrom {
+	//			startIdx++
+	//			//	i++
+	//			//			}
+	//			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
+	//				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
+	//					return true, tempIndices, tempIndices[0].EndIdx + 1
+	//				} else {
+	//					return true, tempIndices, tempIndices[0].EndIdx
+	//				}
+	//			}
+	//			return false, []Group{}, startIdx
+	//		}
+	//		currentStates = &priorityQueue{}
+	//		slices.Reverse(tempStates)
+	//		for _, state := range tempStates {
+	//			heap.Push(currentStates, newPriorQueueItem(state))
+	//		}
+	//		tempStates = nil
+	//
+	//		i++
+	//	}
+	//
+	// // End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
+	// // This is the exact same algorithm used inside the loop, so I should probably put it in a function.
+	//
+	//	if currentStates.Len() > 0 {
+	//		topStateItem := currentStates.peek()
+	//		topState := topStateItem.(*priorQueueItem).state
+	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
+	//		tempStates = append(tempStates, zeroStates...)
+	//		num_appended := 0 // Number of unique states addded to tempStates
+	//		for isZero == true {
+	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
+	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
+	//			if num_appended == 0 { // Break if we haven't appended any more unique values
+	//				break
+	//			}
+	//		}
+	//	}
+	//
+	//	for _, state := range tempStates {
+	//		heap.Push(currentStates, newPriorQueueItem(state))
+	//	}
+	//
+	// tempStates = nil
+	//
+	//	for _, stateItem := range *currentStates {
+	//		state := stateItem.state
+	//		// Only add the match if the start index is in bounds. If the state has an assertion,
+	//		// make sure the assertion checks out.
+	//		if state.isLast && i <= len(str) {
+	//			if state.assert == noneAssert || state.checkAssertion(str, i) {
+	//				for j := 1; j < numGroups+1; j++ {
+	//					tempIndices[j] = state.threadGroups[j]
+	//				}
+	//				endIdx = i
+	//				tempIndices[0] = Group{startIdx, endIdx}
+	//			}
+	//		}
+	//	}
+	//
+	//	if tempIndices.numValidGroups() > 0 {
+	//		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
+	//			return true, tempIndices, tempIndices[0].EndIdx + 1
+	//		} else {
+	//			return true, tempIndices, tempIndices[0].EndIdx
+	//		}
+	//	}
+	//
+	// if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
+	//
+	//		startIdx++
+	//	}
+	//
+	// return false, []Group{}, startIdx
 }
-- 
2.30.2


From e0253dfaf3333d9873c497b5c444d384b4abb183 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Tue, 4 Feb 2025 14:09:04 -0500
Subject: [PATCH 09/48] Change kleene() to an alternation-style construct

---
 regex/nfa.go | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/regex/nfa.go b/regex/nfa.go
index f03edab..a9c1ec6 100644
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -116,7 +116,6 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 		clone.rightState = clone
 	}
 	clone.rightState = cloneStateHelper(stateToClone.rightState, cloneMap)
-
 	return clone
 }
 
@@ -326,12 +325,16 @@ func kleene(s1 nfaState) (*nfaState, error) {
 		return nil, fmt.Errorf("previous token is not quantifiable")
 	}
 
-	toReturn := &nfaState{}
-	toReturn.transitions = make(map[int][]*nfaState)
-	toReturn.content = newContents(epsilon)
+	emptyState := zeroLengthMatchState()
+	emptyState.assert = noneAssert
+	toReturn := alternate(&s1, &emptyState)
+
+	//	toReturn := &nfaState{}
+	//	toReturn.transitions = make(map[int][]*nfaState)
+	//	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
 	toReturn.isKleene = true
-	toReturn.output = append(toReturn.output, toReturn)
+	toReturn.output = []*nfaState{&emptyState}
 	for i := range s1.output {
 		for _, c := range toReturn.content {
 			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], toReturn)
@@ -340,6 +343,7 @@ func kleene(s1 nfaState) (*nfaState, error) {
 	for _, c := range s1.content {
 		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
 	}
+	//toReturn.kleeneState = &s1
 	return toReturn, nil
 }
 
@@ -374,7 +378,6 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
 	s2.content = newContents(epsilon)
 	s2.output = append(s2.output, s2)
 	s2.isEmpty = true
-	s2.isAlternation = true
 	s3 := alternate(s1, s2)
 	return s3
 }
-- 
2.30.2


From 3ce611d12185471e4e760d2954bf942ea7a661e7 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Tue, 4 Feb 2025 14:09:24 -0500
Subject: [PATCH 10/48] More work towards implementing PCRE matching

---
 regex/matching.go | 62 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 8 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index ad7d15b..1263e37 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -314,13 +314,36 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			currentState.threadGroups = newMatch(numGroups + 1)
 			currentState.threadGroups[0].StartIdx = idx
 		}
+
 		if currentState.groupBegin {
 			currentState.threadGroups[currentState.groupNum].StartIdx = idx
-		} else if currentState.groupEnd {
+		}
+
+		if currentState.groupEnd {
 			currentState.threadGroups[currentState.groupNum].EndIdx = idx
-		} else if currentState.isKleene {
-			// Append the
-		} else if currentState.isAlternation {
+		}
+
+		//		if currentState.isKleene {
+		//			// Append the next-state (after the kleene), then append the kleene state
+		//			allMatches := make([]*nfaState, 0)
+		//			for _, v := range currentState.transitions {
+		//				allMatches = append(allMatches, v...)
+		//			}
+		//			slices.Reverse(allMatches)
+		//			for _, m := range allMatches {
+		//				m.threadGroups = currentState.threadGroups
+		//				m.threadSP = idx
+		//			}
+		//			currentStates = append(currentStates, allMatches...)
+		//
+		//			//	kleeneState := currentState.kleeneState
+		//			//	kleeneState.threadGroups = currentState.threadGroups
+		//			//	kleeneState.threadSP = currentState.threadSP
+		//			//	currentStates = append(currentStates, kleeneState)
+		//			continue
+		//		}
+
+		if currentState.isAlternation {
 			rightState := currentState.rightState
 			rightState.threadGroups = currentState.threadGroups
 			rightState.threadSP = currentState.threadSP
@@ -330,7 +353,22 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			leftState.threadSP = currentState.threadSP
 			currentStates = append(currentStates, currentState.leftState)
 			continue
-		} else if currentState.contentContains(str, idx) {
+		}
+
+		if currentState.isEmpty && currentState.assert == noneAssert {
+			allMatches := make([]*nfaState, 0)
+			for _, v := range currentState.transitions {
+				allMatches = append(allMatches, v...)
+			}
+			slices.Reverse(allMatches)
+			for _, m := range allMatches {
+				m.threadGroups = currentState.threadGroups
+				m.threadSP = idx
+			}
+			currentStates = append(currentStates, allMatches...)
+		}
+
+		if currentState.contentContains(str, idx) {
 			foundMatch = true
 			allMatches := make([]*nfaState, 0)
 			for _, v := range currentState.transitions {
@@ -348,9 +386,17 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			currentStates = append(currentStates, allMatches...)
 		}
 
-		if currentState.isLast && foundMatch { // Last state reached
-			currentState.threadGroups[0].EndIdx = idx + 1
-			return true, currentState.threadGroups, idx + 1
+		if currentState.isLast { // Last state reached
+			if foundMatch {
+				currentState.threadGroups[0].EndIdx = idx + 1
+				return true, currentState.threadGroups, idx + 1
+			} else if currentState.isEmpty && currentState.assert == noneAssert {
+				currentState.threadGroups[0].EndIdx = idx
+				if idx == currentState.threadGroups[0].StartIdx {
+					idx++
+				}
+				return true, currentState.threadGroups, idx
+			}
 
 		}
 	}
-- 
2.30.2


From d4e8cb74fdc8efe78a92ee3ef72f0ba9b7566893 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Wed, 5 Feb 2025 11:32:20 -0500
Subject: [PATCH 11/48] Replaced pointer to nfaState with nfaState

---
 regex/matching.go | 65 +++++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 28 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 1263e37..23f8317 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -151,6 +151,11 @@ func pruneIndices(indices []Match) []Match {
 	return toRet
 }
 
+func copyThread(to *nfaState, from nfaState) {
+	to.threadSP = from.threadSP
+	to.threadGroups = from.threadGroups
+}
+
 // Find returns the 0-group of the leftmost match of the regex in the given string.
 // An error value != nil indicates that no match was found.
 func (regex Reg) Find(str string) (Group, error) {
@@ -271,7 +276,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	//	foundPath := false
 	//startIdx := offset
 	//endIdx := offset
-	currentStates := make([]*nfaState, 0)
+	currentStates := make([]nfaState, 0)
 	//	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
 	i := offset // Index in string
 	//startingFrom := i                  // Store starting index
@@ -302,13 +307,15 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	//}
 
 	start.threadSP = i
-	currentStates = append(currentStates, start)
+	currentStates = append(currentStates, *start)
 	var foundMatch bool
+	var isEmptyAndNoAssertion bool
 	// Main loop
 	for len(currentStates) > 0 {
 		currentState, _ := pop(&currentStates)
 		idx := currentState.threadSP
 		foundMatch = false
+		isEmptyAndNoAssertion = false
 
 		if currentState.threadGroups == nil {
 			currentState.threadGroups = newMatch(numGroups + 1)
@@ -343,44 +350,39 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 		//			continue
 		//		}
 
+		// Alternation - enqueue left then right state, and continue
 		if currentState.isAlternation {
 			rightState := currentState.rightState
-			rightState.threadGroups = currentState.threadGroups
-			rightState.threadSP = currentState.threadSP
-			currentStates = append(currentStates, currentState.rightState)
+			copyThread(rightState, currentState)
+			currentStates = append(currentStates, *currentState.rightState)
 			leftState := currentState.leftState
-			leftState.threadGroups = currentState.threadGroups
-			leftState.threadSP = currentState.threadSP
-			currentStates = append(currentStates, currentState.leftState)
+			copyThread(leftState, currentState)
+			currentStates = append(currentStates, *currentState.leftState)
 			continue
 		}
 
+		// Empty state - enqueue next state, do _not_ increment the SP
 		if currentState.isEmpty && currentState.assert == noneAssert {
-			allMatches := make([]*nfaState, 0)
-			for _, v := range currentState.transitions {
-				allMatches = append(allMatches, v...)
-			}
-			slices.Reverse(allMatches)
-			for _, m := range allMatches {
-				m.threadGroups = currentState.threadGroups
-				m.threadSP = idx
-			}
-			currentStates = append(currentStates, allMatches...)
+			isEmptyAndNoAssertion = true
 		}
 
 		if currentState.contentContains(str, idx) {
 			foundMatch = true
-			allMatches := make([]*nfaState, 0)
+		}
+
+		if isEmptyAndNoAssertion || foundMatch {
+			allMatches := make([]nfaState, 0)
 			for _, v := range currentState.transitions {
-				allMatches = append(allMatches, v...)
+				dereferenced := funcMap(v, func(s *nfaState) nfaState {
+					return *s
+				})
+				allMatches = append(allMatches, dereferenced...)
 			}
 			slices.Reverse(allMatches)
-			for _, m := range allMatches {
-				m.threadGroups = currentState.threadGroups
-				if currentState.assert == noneAssert {
-					m.threadSP = idx + 1
-				} else {
-					m.threadSP = idx
+			for i := range allMatches {
+				copyThread(&allMatches[i], currentState)
+				if foundMatch && currentState.assert == noneAssert {
+					allMatches[i].threadSP += 1
 				}
 			}
 			currentStates = append(currentStates, allMatches...)
@@ -388,8 +390,15 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 		if currentState.isLast { // Last state reached
 			if foundMatch {
-				currentState.threadGroups[0].EndIdx = idx + 1
-				return true, currentState.threadGroups, idx + 1
+				if currentState.assert != noneAssert {
+					currentState.threadGroups[0].EndIdx = idx
+				} else {
+					currentState.threadGroups[0].EndIdx = idx + 1
+				}
+				if idx == currentState.threadGroups[0].StartIdx {
+					idx += 1
+				}
+				return true, currentState.threadGroups, idx
 			} else if currentState.isEmpty && currentState.assert == noneAssert {
 				currentState.threadGroups[0].EndIdx = idx
 				if idx == currentState.threadGroups[0].StartIdx {
-- 
2.30.2


From 7c62ba6bfdf9db60de31e61d49445fbc3e66e5da Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Wed, 5 Feb 2025 12:21:04 -0500
Subject: [PATCH 12/48] Started implementing Thompson's algorithm for matching,
 because the old one was completely backtracking (so it would enter infinite
 loops on something like '(a*)*' )

The git diff claims that a ton of code was changed, but most of it was just indentation changes.
---
 regex/matching.go | 180 +++++++++++++++++++++++-----------------------
 1 file changed, 92 insertions(+), 88 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 23f8317..760f7c1 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -277,6 +277,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	//startIdx := offset
 	//endIdx := offset
 	currentStates := make([]nfaState, 0)
+	nextStates := make([]nfaState, 0)
 	//	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
 	i := offset // Index in string
 	//startingFrom := i                  // Store starting index
@@ -311,103 +312,106 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	var foundMatch bool
 	var isEmptyAndNoAssertion bool
 	// Main loop
-	for len(currentStates) > 0 {
-		currentState, _ := pop(&currentStates)
-		idx := currentState.threadSP
-		foundMatch = false
-		isEmptyAndNoAssertion = false
+	for idx := i; idx <= len(str); idx++ {
+		for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
+			currentState := currentStates[currentStateIdx]
+			foundMatch = false
+			isEmptyAndNoAssertion = false
 
-		if currentState.threadGroups == nil {
-			currentState.threadGroups = newMatch(numGroups + 1)
-			currentState.threadGroups[0].StartIdx = idx
-		}
-
-		if currentState.groupBegin {
-			currentState.threadGroups[currentState.groupNum].StartIdx = idx
-		}
-
-		if currentState.groupEnd {
-			currentState.threadGroups[currentState.groupNum].EndIdx = idx
-		}
-
-		//		if currentState.isKleene {
-		//			// Append the next-state (after the kleene), then append the kleene state
-		//			allMatches := make([]*nfaState, 0)
-		//			for _, v := range currentState.transitions {
-		//				allMatches = append(allMatches, v...)
-		//			}
-		//			slices.Reverse(allMatches)
-		//			for _, m := range allMatches {
-		//				m.threadGroups = currentState.threadGroups
-		//				m.threadSP = idx
-		//			}
-		//			currentStates = append(currentStates, allMatches...)
-		//
-		//			//	kleeneState := currentState.kleeneState
-		//			//	kleeneState.threadGroups = currentState.threadGroups
-		//			//	kleeneState.threadSP = currentState.threadSP
-		//			//	currentStates = append(currentStates, kleeneState)
-		//			continue
-		//		}
-
-		// Alternation - enqueue left then right state, and continue
-		if currentState.isAlternation {
-			rightState := currentState.rightState
-			copyThread(rightState, currentState)
-			currentStates = append(currentStates, *currentState.rightState)
-			leftState := currentState.leftState
-			copyThread(leftState, currentState)
-			currentStates = append(currentStates, *currentState.leftState)
-			continue
-		}
-
-		// Empty state - enqueue next state, do _not_ increment the SP
-		if currentState.isEmpty && currentState.assert == noneAssert {
-			isEmptyAndNoAssertion = true
-		}
-
-		if currentState.contentContains(str, idx) {
-			foundMatch = true
-		}
-
-		if isEmptyAndNoAssertion || foundMatch {
-			allMatches := make([]nfaState, 0)
-			for _, v := range currentState.transitions {
-				dereferenced := funcMap(v, func(s *nfaState) nfaState {
-					return *s
-				})
-				allMatches = append(allMatches, dereferenced...)
+			if currentState.threadGroups == nil {
+				currentState.threadGroups = newMatch(numGroups + 1)
+				currentState.threadGroups[0].StartIdx = idx
 			}
-			slices.Reverse(allMatches)
-			for i := range allMatches {
-				copyThread(&allMatches[i], currentState)
-				if foundMatch && currentState.assert == noneAssert {
-					allMatches[i].threadSP += 1
+
+			if currentState.groupBegin {
+				currentState.threadGroups[currentState.groupNum].StartIdx = idx
+			}
+
+			if currentState.groupEnd {
+				currentState.threadGroups[currentState.groupNum].EndIdx = idx
+			}
+
+			//		if currentState.isKleene {
+			//			// Append the next-state (after the kleene), then append the kleene state
+			//			allMatches := make([]*nfaState, 0)
+			//			for _, v := range currentState.transitions {
+			//				allMatches = append(allMatches, v...)
+			//			}
+			//			slices.Reverse(allMatches)
+			//			for _, m := range allMatches {
+			//				m.threadGroups = currentState.threadGroups
+			//				m.threadSP = idx
+			//			}
+			//			currentStates = append(currentStates, allMatches...)
+			//
+			//			//	kleeneState := currentState.kleeneState
+			//			//	kleeneState.threadGroups = currentState.threadGroups
+			//			//	kleeneState.threadSP = currentState.threadSP
+			//			//	currentStates = append(currentStates, kleeneState)
+			//			continue
+			//		}
+
+			// Alternation - enqueue left then right state, and continue
+			if currentState.isAlternation {
+				leftState := currentState.leftState
+				copyThread(leftState, currentState)
+				currentStates = append(currentStates, *currentState.leftState)
+				rightState := currentState.rightState
+				copyThread(rightState, currentState)
+				currentStates = append(currentStates, *currentState.rightState)
+				continue
+			}
+
+			// Empty state - enqueue next state, do _not_ increment the SP
+			if currentState.isEmpty && currentState.assert == noneAssert {
+				isEmptyAndNoAssertion = true
+			}
+
+			if currentState.contentContains(str, idx) {
+				foundMatch = true
+			}
+
+			if isEmptyAndNoAssertion || foundMatch {
+				allMatches := make([]nfaState, 0)
+				for _, v := range currentState.transitions {
+					dereferenced := funcMap(v, func(s *nfaState) nfaState {
+						return *s
+					})
+					allMatches = append(allMatches, dereferenced...)
 				}
+				slices.Reverse(allMatches)
+				for i := range allMatches {
+					copyThread(&allMatches[i], currentState)
+					if foundMatch && currentState.assert == noneAssert {
+						allMatches[i].threadSP += 1
+					}
+				}
+				nextStates = append(nextStates, allMatches...)
 			}
-			currentStates = append(currentStates, allMatches...)
-		}
 
-		if currentState.isLast { // Last state reached
-			if foundMatch {
-				if currentState.assert != noneAssert {
+			if currentState.isLast { // Last state reached
+				if foundMatch {
+					if currentState.assert != noneAssert {
+						currentState.threadGroups[0].EndIdx = idx
+					} else {
+						currentState.threadGroups[0].EndIdx = idx + 1
+					}
+					if idx == currentState.threadGroups[0].StartIdx {
+						idx += 1
+					}
+					return true, currentState.threadGroups, idx
+				} else if currentState.isEmpty && currentState.assert == noneAssert {
 					currentState.threadGroups[0].EndIdx = idx
-				} else {
-					currentState.threadGroups[0].EndIdx = idx + 1
+					if idx == currentState.threadGroups[0].StartIdx {
+						idx++
+					}
+					return true, currentState.threadGroups, idx
 				}
-				if idx == currentState.threadGroups[0].StartIdx {
-					idx += 1
-				}
-				return true, currentState.threadGroups, idx
-			} else if currentState.isEmpty && currentState.assert == noneAssert {
-				currentState.threadGroups[0].EndIdx = idx
-				if idx == currentState.threadGroups[0].StartIdx {
-					idx++
-				}
-				return true, currentState.threadGroups, idx
-			}
 
+			}
 		}
+		copy(currentStates, nextStates)
+		nextStates = nil
 	}
 	return false, []Group{}, i + 1
 	//		zeroStates := make([]*nfaState, 0)
-- 
2.30.2


From 858e535fba88846dc8ecda50010834c27762ce92 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Wed, 5 Feb 2025 18:01:36 -0500
Subject: [PATCH 13/48] Continued implementing Thompson's algorithm

---
 regex/matching.go | 43 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 7 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 760f7c1..3252742 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -153,7 +153,7 @@ func pruneIndices(indices []Match) []Match {
 
 func copyThread(to *nfaState, from nfaState) {
 	to.threadSP = from.threadSP
-	to.threadGroups = from.threadGroups
+	to.threadGroups = append([]Group{}, from.threadGroups...)
 }
 
 // Find returns the 0-group of the leftmost match of the regex in the given string.
@@ -325,10 +325,33 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 			if currentState.groupBegin {
 				currentState.threadGroups[currentState.groupNum].StartIdx = idx
+				//		allMatches := make([]nfaState, 0)
+				//		for _, v := range currentState.transitions {
+				//			dereferenced := funcMap(v, func(s *nfaState) nfaState {
+				//				return *s
+				//			})
+				//			allMatches = append(allMatches, dereferenced...)
+				//		}
+				//		slices.Reverse(allMatches)
+				//		for i := range allMatches {
+				//			copyThread(&allMatches[i], currentState)
+				//		}
+				//		currentStates = append(currentStates, allMatches...)
 			}
-
 			if currentState.groupEnd {
 				currentState.threadGroups[currentState.groupNum].EndIdx = idx
+				//			allMatches := make([]nfaState, 0)
+				//			for _, v := range currentState.transitions {
+				//				dereferenced := funcMap(v, func(s *nfaState) nfaState {
+				//					return *s
+				//				})
+				//				allMatches = append(allMatches, dereferenced...)
+				//			}
+				//			slices.Reverse(allMatches)
+				//			for i := range allMatches {
+				//				copyThread(&allMatches[i], currentState)
+				//			}
+				//			currentStates = append(currentStates, allMatches...)
 			}
 
 			//		if currentState.isKleene {
@@ -363,7 +386,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			}
 
 			// Empty state - enqueue next state, do _not_ increment the SP
-			if currentState.isEmpty && currentState.assert == noneAssert {
+			if currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
 				isEmptyAndNoAssertion = true
 			}
 
@@ -386,10 +409,16 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 						allMatches[i].threadSP += 1
 					}
 				}
-				nextStates = append(nextStates, allMatches...)
+				if currentState.groupBegin {
+					currentStates = slices.Insert(currentStates, currentStateIdx+1, allMatches...)
+				} else if currentState.groupEnd {
+					currentStates = append(currentStates, allMatches...)
+				} else {
+					nextStates = append(nextStates, allMatches...)
+				}
 			}
 
-			if currentState.isLast { // Last state reached
+			if currentState.isLast && len(nextStates) == 0 { // Last state reached
 				if foundMatch {
 					if currentState.assert != noneAssert {
 						currentState.threadGroups[0].EndIdx = idx
@@ -400,7 +429,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 						idx += 1
 					}
 					return true, currentState.threadGroups, idx
-				} else if currentState.isEmpty && currentState.assert == noneAssert {
+				} else if isEmptyAndNoAssertion {
 					currentState.threadGroups[0].EndIdx = idx
 					if idx == currentState.threadGroups[0].StartIdx {
 						idx++
@@ -410,7 +439,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 			}
 		}
-		copy(currentStates, nextStates)
+		currentStates = append([]nfaState{}, nextStates...)
 		nextStates = nil
 	}
 	return false, []Group{}, i + 1
-- 
2.30.2


From cca8c7cda2df7f221bc3430ab8bfad2190572b98 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Wed, 5 Feb 2025 22:20:28 -0500
Subject: [PATCH 14/48] Got rid of transitions parameter, changed how kleene
 state is processed

I replaced the transition parameter for nfaState, replacing it with a
single nfaState pointer. This is because any non-alternation state will
only have one next state, so the map was just added complexity.

I changed alternation processing - instead of having their own dedicated
fields, they just use the new 'next' parameter, and another one called
'splitState'.

I also changed the kleene state processing to remove the unecessary
empty state in the right-side alternation (it actually messed up my
matching).
---
 regex/compile.go |  19 ++--
 regex/nfa.go     | 271 ++++++++++++++++++++++++-----------------------
 2 files changed, 147 insertions(+), 143 deletions(-)

diff --git a/regex/compile.go b/regex/compile.go
index 1068966..0429c37 100644
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -822,7 +822,6 @@ func thompson(re []postfixNode) (Reg, error) {
 	for _, c := range re {
 		if c.nodetype == characterNode || c.nodetype == assertionNode {
 			stateToAdd := nfaState{}
-			stateToAdd.transitions = make(map[int][]*nfaState)
 			if c.allChars {
 				stateToAdd.allChars = true
 				if len(c.except) != 0 {
@@ -934,7 +933,6 @@ func thompson(re []postfixNode) (Reg, error) {
 			s.isEmpty = true
 			s.output = make([]*nfaState, 0)
 			s.output = append(s.output, s)
-			s.transitions = make(map[int][]*nfaState)
 			// LPAREN nodes are just added normally
 			if c.nodetype == lparenNode {
 				numGroups++
@@ -966,7 +964,7 @@ func thompson(re []postfixNode) (Reg, error) {
 					s.groupNum = lparenNode.groupNum
 					to_add := concatenate(lparenNode, s)
 					nfa = append(nfa, to_add)
-				} else if middleNode.groupBegin && len(middleNode.transitions) == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
+				} else if middleNode.groupBegin && middleNode.numTransitions() == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
 					nfa = append(nfa, lparenNode)    // I shouldn't have popped this out, because it is not involved in the current capturing group
 					s.groupNum = middleNode.groupNum // In this case, the 'middle' node is actually an lparen
 					to_add := concatenate(middleNode, s)
@@ -1030,14 +1028,14 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, fmt.Errorf("error applying kleene star")
 			}
-			stateToAdd, err := kleene(*s1)
+			stateToAdd, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
 			nfa = append(nfa, stateToAdd)
 		case plusNode: // a+ is equivalent to aa*
 			s1 := mustPop(&nfa)
-			s2, err := kleene(*s1)
+			s2, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
@@ -1061,14 +1059,14 @@ func thompson(re []postfixNode) (Reg, error) {
 			// 	'^|a'
 			s2, err1 := pop(&nfa)
 			s1, err2 := pop(&nfa)
-			if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err2 != nil || (s2.groupBegin && s2.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back
 					nfa = append(nfa, s2)
 				}
 				tmp := zeroLengthMatchState()
 				s2 = &tmp
 			}
-			if err1 != nil || (s1.groupBegin && len(s1.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err1 == nil { // See above for explanation
 					nfa = append(nfa, s1)
 				}
@@ -1100,7 +1098,7 @@ func thompson(re []postfixNode) (Reg, error) {
 				stateToAdd = concatenate(stateToAdd, cloneState(poppedState))
 			}
 			if c.endReps == infinite_reps { // Case 3
-				s2, err := kleene(*poppedState)
+				s2, err := kleene(poppedState)
 				if err != nil {
 					return Reg{}, err
 				}
@@ -1117,7 +1115,10 @@ func thompson(re []postfixNode) (Reg, error) {
 		return Reg{}, fmt.Errorf("invalid regex")
 	}
 
-	verifyLastStates(nfa)
+	lastState := newState()
+	lastState.isLast = true
+
+	concatenate(nfa[0], &lastState)
 
 	return Reg{nfa[0], numGroups}, nil
 
diff --git a/regex/nfa.go b/regex/nfa.go
index a9c1ec6..79daaf6 100644
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -25,25 +25,25 @@ const (
 )
 
 type nfaState struct {
-	content                    stateContents       // Contents of current state
-	isEmpty                    bool                // If it is empty - Union operator and Kleene star states will be empty
-	isLast                     bool                // If it is the last state (acept state)
-	output                     []*nfaState         // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
-	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
-	isKleene                   bool                // Identifies whether current node is a 0-state representing Kleene star
-	isQuestion                 bool                // Identifies whether current node is a 0-state representing the question operator
-	isAlternation              bool                // Identifies whether current node is a 0-state representing an alternation
-	leftState                  *nfaState           // Only for alternation states - the 'left' branch of the alternation
-	rightState                 *nfaState           // Only for alternation states - the 'right' branch of the alternation
-	assert                     assertType          // Type of assertion of current node - NONE means that the node doesn't assert anything
-	allChars                   bool                // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
-	except                     []rune              // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
-	lookaroundRegex            string              // Only for lookaround states - Contents of the regex that the lookaround state holds
-	lookaroundNFA              *nfaState           // Holds the NFA of the lookaroundRegex - if it exists
-	lookaroundNumCaptureGroups int                 // Number of capturing groups in lookaround regex if current node is a lookaround
-	groupBegin                 bool                // Whether or not the node starts a capturing group
-	groupEnd                   bool                // Whether or not the node ends a capturing group
-	groupNum                   int                 // Which capturing group the node starts / ends
+	content stateContents // Contents of current state
+	isEmpty bool          // If it is empty - Union operator and Kleene star states will be empty
+	isLast  bool          // If it is the last state (acept state)
+	output  []*nfaState   // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
+	//	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
+	next                       *nfaState  // The next state (not for alternation or kleene states)
+	isKleene                   bool       // Identifies whether current node is a 0-state representing Kleene star
+	isQuestion                 bool       // Identifies whether current node is a 0-state representing the question operator
+	isAlternation              bool       // Identifies whether current node is a 0-state representing an alternation
+	splitState                 *nfaState  // Only for alternation states - the 'other' branch of the alternation ('next' is the first)
+	assert                     assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
+	allChars                   bool       // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
+	except                     []rune     // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
+	lookaroundRegex            string     // Only for lookaround states - Contents of the regex that the lookaround state holds
+	lookaroundNFA              *nfaState  // Holds the NFA of the lookaroundRegex - if it exists
+	lookaroundNumCaptureGroups int        // Number of capturing groups in lookaround regex if current node is a lookaround
+	groupBegin                 bool       // Whether or not the node starts a capturing group
+	groupEnd                   bool       // Whether or not the node ends a capturing group
+	groupNum                   int        // Which capturing group the node starts / ends
 	// The following properties depend on the current match - I should think about resetting them for every match.
 	zeroMatchFound bool    // Whether or not the state has been used for a zero-length match - only relevant for zero states
 	threadGroups   []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
@@ -73,7 +73,6 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 		isEmpty:         stateToClone.isEmpty,
 		isLast:          stateToClone.isLast,
 		output:          make([]*nfaState, len(stateToClone.output)),
-		transitions:     make(map[int][]*nfaState),
 		isKleene:        stateToClone.isKleene,
 		isQuestion:      stateToClone.isQuestion,
 		isAlternation:   stateToClone.isAlternation,
@@ -94,28 +93,18 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 			clone.output[i] = cloneStateHelper(s, cloneMap)
 		}
 	}
-	for k, v := range stateToClone.transitions {
-		clone.transitions[k] = make([]*nfaState, len(v))
-		for i, s := range v {
-			if s == stateToClone {
-				clone.transitions[k][i] = clone
-			} else {
-				clone.transitions[k][i] = cloneStateHelper(s, cloneMap)
-			}
-		}
-	}
 	if stateToClone.lookaroundNFA == stateToClone {
 		clone.lookaroundNFA = clone
 	}
 	clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap)
-	if stateToClone.leftState == stateToClone {
-		clone.leftState = clone
+	if stateToClone.splitState == stateToClone {
+		clone.splitState = clone
 	}
-	clone.leftState = cloneStateHelper(stateToClone.leftState, cloneMap)
-	if stateToClone.rightState == stateToClone {
-		clone.rightState = clone
+	clone.splitState = cloneStateHelper(stateToClone.splitState, cloneMap)
+	if stateToClone.next == stateToClone {
+		clone.next = clone
 	}
-	clone.rightState = cloneStateHelper(stateToClone.rightState, cloneMap)
+	clone.next = cloneStateHelper(stateToClone.next, cloneMap)
 	return clone
 }
 
@@ -126,6 +115,9 @@ func resetThreads(start *nfaState) {
 }
 
 func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
+	if state == nil {
+		return
+	}
 	if _, ok := visitedMap[state]; ok {
 		return
 	}
@@ -133,10 +125,11 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	state.threadGroups = nil
 	state.threadSP = 0
 	visitedMap[state] = true
-	for _, v := range state.transitions {
-		for _, nextState := range v {
-			resetThreadsHelper(nextState, visitedMap)
-		}
+	if state.isAlternation {
+		resetThreadsHelper(state.next, visitedMap)
+		resetThreadsHelper(state.splitState, visitedMap)
+	} else {
+		resetThreadsHelper(state.next, visitedMap)
 	}
 }
 
@@ -237,74 +230,84 @@ func (s nfaState) isLookaround() bool {
 	return s.assert == plaAssert || s.assert == plbAssert || s.assert == nlaAssert || s.assert == nlbAssert
 }
 
+func (s nfaState) numTransitions() int {
+	if s.next == nil && s.splitState == nil {
+		return 0
+	}
+	if s.next == nil || s.splitState == nil {
+		return 1
+	}
+	return 2
+}
+
 // Returns the matches for the character at the given index of the given string.
 // Also returns the number of matches. Returns -1 if an assertion failed.
-func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
-	// Assertions can be viewed as 'checks'. If the check fails, we return
-	// an empty array and 0.
-	// If it passes, we treat it like any other state, and return all the transitions.
-	if s.assert != noneAssert {
-		if s.checkAssertion(str, idx) == false {
-			return make([]*nfaState, 0), -1
-		}
-	}
-	listTransitions := s.transitions[int(str[idx])]
-	for _, dest := range s.transitions[int(anyCharRune)] {
-		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
-			// Add an allChar state to the list of matches if:
-			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
-			// 		b. The current character isn't the state's exception list.
-			listTransitions = append(listTransitions, dest)
-		}
-	}
-	numTransitions := len(listTransitions)
-	return listTransitions, numTransitions
-}
+//func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
+//	// Assertions can be viewed as 'checks'. If the check fails, we return
+//	// an empty array and 0.
+//	// If it passes, we treat it like any other state, and return all the transitions.
+//	if s.assert != noneAssert {
+//		if s.checkAssertion(str, idx) == false {
+//			return make([]*nfaState, 0), -1
+//		}
+//	}
+//	listTransitions := s.transitions[int(str[idx])]
+//	for _, dest := range s.transitions[int(anyCharRune)] {
+//		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
+//			// Add an allChar state to the list of matches if:
+//			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
+//			// 		b. The current character isn't the state's exception list.
+//			listTransitions = append(listTransitions, dest)
+//		}
+//	}
+//	numTransitions := len(listTransitions)
+//	return listTransitions, numTransitions
+//}
 
 // verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
-func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
-	if len(st.transitions) == 0 {
-		st.isLast = true
-		return
-	}
-	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
-	if len(st.transitions) == 1 { // Eg. a*
-		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
-		for _, c := range st.content {
-			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
-				moreThanOneTrans = true
-			}
-		}
-		st.isLast = !moreThanOneTrans
-	}
-
-	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
-		transitionDests := make([]*nfaState, 0)
-		for _, v := range st.transitions {
-			transitionDests = append(transitionDests, v...)
-		}
-		if allEqual(transitionDests...) {
-			st.isLast = true
-			return
-		}
-	}
-	if visited[st] == true {
-		return
-	}
-	visited[st] = true
-	for _, states := range st.transitions {
-		for i := range states {
-			if states[i] != st {
-				verifyLastStatesHelper(states[i], visited)
-			}
-		}
-	}
-}
+//func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
+//	if st.numTransitions() == 0 {
+//		st.isLast = true
+//		return
+//	}
+//	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
+//	if st.numTransitions() == 1 { // Eg. a*
+//		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
+//		for _, c := range st.content {
+//			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
+//				moreThanOneTrans = true
+//			}
+//		}
+//		st.isLast = !moreThanOneTrans
+//	}
+//
+//	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
+//		transitionDests := make([]*nfaState, 0)
+//		for _, v := range st.transitions {
+//			transitionDests = append(transitionDests, v...)
+//		}
+//		if allEqual(transitionDests...) {
+//			st.isLast = true
+//			return
+//		}
+//	}
+//	if visited[st] == true {
+//		return
+//	}
+//	visited[st] = true
+//	for _, states := range st.transitions {
+//		for i := range states {
+//			if states[i] != st {
+//				verifyLastStatesHelper(states[i], visited)
+//			}
+//		}
+//	}
+//}
 
 // verifyLastStates enables the 'isLast' flag for the leaf nodes (last states)
-func verifyLastStates(start []*nfaState) {
-	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
-}
+//func verifyLastStates(start []*nfaState) {
+//	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
+//}
 
 // Concatenates s1 and s2, returns the start of the concatenation.
 func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
@@ -312,69 +315,69 @@ func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
 		return s2
 	}
 	for i := range s1.output {
-		for _, c := range s2.content { // Create transitions for every element in s1's content to s2'
-			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], s2)
-		}
+		s1.output[i].next = s2
 	}
 	s1.output = s2.output
 	return s1
 }
 
-func kleene(s1 nfaState) (*nfaState, error) {
+func kleene(s1 *nfaState) (*nfaState, error) {
 	if s1.isEmpty && s1.assert != noneAssert {
 		return nil, fmt.Errorf("previous token is not quantifiable")
 	}
 
-	emptyState := zeroLengthMatchState()
-	emptyState.assert = noneAssert
-	toReturn := alternate(&s1, &emptyState)
+	toReturn := &nfaState{}
+	toReturn.isEmpty = true
+	toReturn.isAlternation = true
+	toReturn.content = newContents(epsilon)
+	toReturn.splitState = s1
+	for i := range s1.output {
+		s1.output[i].next = toReturn
+	}
 
 	//	toReturn := &nfaState{}
 	//	toReturn.transitions = make(map[int][]*nfaState)
 	//	toReturn.content = newContents(epsilon)
-	toReturn.isEmpty = true
 	toReturn.isKleene = true
-	toReturn.output = []*nfaState{&emptyState}
+	toReturn.output = append([]*nfaState{}, toReturn)
 	for i := range s1.output {
-		for _, c := range toReturn.content {
-			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], toReturn)
-		}
-	}
-	for _, c := range s1.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
+		s1.output[i].next = toReturn
 	}
+	//	for _, c := range s1.content {
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
+	//	}
 	//toReturn.kleeneState = &s1
 	return toReturn, nil
 }
 
 func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
 	toReturn := &nfaState{}
-	toReturn.transitions = make(map[int][]*nfaState)
+	//	toReturn.transitions = make(map[int][]*nfaState)
 	toReturn.output = append(toReturn.output, s1.output...)
 	toReturn.output = append(toReturn.output, s2.output...)
-	// Unique append is used here (and elsewhere) to ensure that,
-	// for any given transition, a state can only be mentioned once.
-	// For example, given the transition 'a', the state 's1' can only be mentioned once.
-	// This would lead to multiple instances of the same set of match indices, since both
-	// 's1' states would be considered to match.
-	for _, c := range s1.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
-	}
-	for _, c := range s2.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
-	}
+	//	// Unique append is used here (and elsewhere) to ensure that,
+	//	// for any given transition, a state can only be mentioned once.
+	//	// For example, given the transition 'a', the state 's1' can only be mentioned once.
+	//	// This would lead to multiple instances of the same set of match indices, since both
+	//	// 's1' states would be considered to match.
+	//	for _, c := range s1.content {
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
+	//	}
+	//	for _, c := range s2.content {
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
+	//	}
 	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
 	toReturn.isAlternation = true
-	toReturn.leftState = s1
-	toReturn.rightState = s2
+	toReturn.next = s1
+	toReturn.splitState = s2
 
 	return toReturn
 }
 
 func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
 	s2 := &nfaState{}
-	s2.transitions = make(map[int][]*nfaState)
+	//	s2.transitions = make(map[int][]*nfaState)
 	s2.content = newContents(epsilon)
 	s2.output = append(s2.output, s2)
 	s2.isEmpty = true
@@ -385,8 +388,8 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
 // Creates and returns a new state with the 'default' values.
 func newState() nfaState {
 	ret := nfaState{
-		output:          make([]*nfaState, 0),
-		transitions:     make(map[int][]*nfaState),
+		output: make([]*nfaState, 0),
+		//		transitions:     make(map[int][]*nfaState),
 		assert:          noneAssert,
 		except:          append([]rune{}, 0),
 		lookaroundRegex: "",
-- 
2.30.2


From fbc9bea9fb78beeefb5bf8602e7ed13c0591d10b Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Wed, 5 Feb 2025 22:23:31 -0500
Subject: [PATCH 15/48] Commented out unused functions; use new nfaState
 parameters

---
 regex/matching.go | 145 ++++++++++++++++++++++------------------------
 1 file changed, 68 insertions(+), 77 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 3252742..d504801 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -74,58 +74,58 @@ func getZeroGroup(m Match) Group {
 // given slice. It returns the resulting states. If any of the resulting states is a 0-state,
 // the second ret val is true.
 // If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
-func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
-	for _, state := range states {
-		if len(state.transitions[epsilon]) > 0 {
-			for _, s := range state.transitions[epsilon] {
-				if s.threadGroups == nil {
-					s.threadGroups = newMatch(numGroups + 1)
-				}
-				copy(s.threadGroups, state.threadGroups)
-				if s.groupBegin {
-					s.threadGroups[s.groupNum].StartIdx = idx
-					//					openParenGroups = append(openParenGroups, s.groupNum)
-				}
-				if s.groupEnd {
-					s.threadGroups[s.groupNum].EndIdx = idx
-					//					closeParenGroups = append(closeParenGroups, s.groupNum)
-				}
-			}
-			rtv = append(rtv, state.transitions[epsilon]...)
-		}
-	}
-	for _, state := range rtv {
-		if len(state.transitions[epsilon]) > 0 {
-			return rtv, true
-		}
-	}
-	return rtv, false
-}
+//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
+//	for _, state := range states {
+//		if len(state.transitions[epsilon]) > 0 {
+//			for _, s := range state.transitions[epsilon] {
+//				if s.threadGroups == nil {
+//					s.threadGroups = newMatch(numGroups + 1)
+//				}
+//				copy(s.threadGroups, state.threadGroups)
+//				if s.groupBegin {
+//					s.threadGroups[s.groupNum].StartIdx = idx
+//					//					openParenGroups = append(openParenGroups, s.groupNum)
+//				}
+//				if s.groupEnd {
+//					s.threadGroups[s.groupNum].EndIdx = idx
+//					//					closeParenGroups = append(closeParenGroups, s.groupNum)
+//				}
+//			}
+//			rtv = append(rtv, state.transitions[epsilon]...)
+//		}
+//	}
+//	for _, state := range rtv {
+//		if len(state.transitions[epsilon]) > 0 {
+//			return rtv, true
+//		}
+//	}
+//	return rtv, false
+//}
 
 // zeroMatchPossible returns true if a zero-length match is possible
 // from any of the given states, given the string and our position in it.
 // It uses the same algorithm to find zero-states as the one inside the loop,
 // so I should probably put it in a function.
-func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
-	zeroStates, isZero := takeZeroState(states, numGroups, idx)
-	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
-	tempstates = append(tempstates, states...)
-	tempstates = append(tempstates, zeroStates...)
-	num_appended := 0 // number of unique states addded to tempstates
-	for isZero == true {
-		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
-		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
-		if num_appended == 0 { // break if we haven't appended any more unique values
-			break
-		}
-	}
-	for _, state := range tempstates {
-		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
-			return true
-		}
-	}
-	return false
-}
+//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
+//	zeroStates, isZero := takeZeroState(states, numGroups, idx)
+//	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
+//	tempstates = append(tempstates, states...)
+//	tempstates = append(tempstates, zeroStates...)
+//	num_appended := 0 // number of unique states addded to tempstates
+//	for isZero == true {
+//		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
+//		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
+//		if num_appended == 0 { // break if we haven't appended any more unique values
+//			break
+//		}
+//	}
+//	for _, state := range tempstates {
+//		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
+//			return true
+//		}
+//	}
+//	return false
+//}
 
 // Prunes the slice by removing overlapping indices.
 func pruneIndices(indices []Match) []Match {
@@ -376,17 +376,26 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 			// Alternation - enqueue left then right state, and continue
 			if currentState.isAlternation {
-				leftState := currentState.leftState
-				copyThread(leftState, currentState)
-				currentStates = append(currentStates, *currentState.leftState)
-				rightState := currentState.rightState
-				copyThread(rightState, currentState)
-				currentStates = append(currentStates, *currentState.rightState)
+				if currentState.isKleene { // Reverse order of adding things
+					rightState := currentState.splitState
+					copyThread(rightState, currentState)
+					currentStates = append(currentStates, *currentState.splitState)
+					leftState := currentState.next
+					copyThread(leftState, currentState)
+					currentStates = append(currentStates, *currentState.next)
+				} else {
+					leftState := currentState.next
+					copyThread(leftState, currentState)
+					currentStates = append(currentStates, *currentState.next)
+					rightState := currentState.splitState
+					copyThread(rightState, currentState)
+					currentStates = append(currentStates, *currentState.splitState)
+				}
 				continue
 			}
 
 			// Empty state - enqueue next state, do _not_ increment the SP
-			if currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
+			if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
 				isEmptyAndNoAssertion = true
 			}
 
@@ -396,12 +405,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 			if isEmptyAndNoAssertion || foundMatch {
 				allMatches := make([]nfaState, 0)
-				for _, v := range currentState.transitions {
-					dereferenced := funcMap(v, func(s *nfaState) nfaState {
-						return *s
-					})
-					allMatches = append(allMatches, dereferenced...)
-				}
+				allMatches = append(allMatches, *(currentState.next))
 				slices.Reverse(allMatches)
 				for i := range allMatches {
 					copyThread(&allMatches[i], currentState)
@@ -419,24 +423,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			}
 
 			if currentState.isLast && len(nextStates) == 0 { // Last state reached
-				if foundMatch {
-					if currentState.assert != noneAssert {
-						currentState.threadGroups[0].EndIdx = idx
-					} else {
-						currentState.threadGroups[0].EndIdx = idx + 1
-					}
-					if idx == currentState.threadGroups[0].StartIdx {
-						idx += 1
-					}
-					return true, currentState.threadGroups, idx
-				} else if isEmptyAndNoAssertion {
-					currentState.threadGroups[0].EndIdx = idx
-					if idx == currentState.threadGroups[0].StartIdx {
-						idx++
-					}
-					return true, currentState.threadGroups, idx
+				currentState.threadGroups[0].EndIdx = idx
+				if idx == currentState.threadGroups[0].StartIdx {
+					idx += 1
 				}
-
+				return true, currentState.threadGroups, idx
 			}
 		}
 		currentStates = append([]nfaState{}, nextStates...)
-- 
2.30.2


From ed4ffde64e57c555d3f3fb3aa887eaeab66597de Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Wed, 5 Feb 2025 22:51:55 -0500
Subject: [PATCH 16/48] REFACTOR NEEDED: Added another special case; insert
 instead of appending into currentStates

---
 regex/matching.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index d504801..95f7a69 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -379,17 +379,17 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 				if currentState.isKleene { // Reverse order of adding things
 					rightState := currentState.splitState
 					copyThread(rightState, currentState)
-					currentStates = append(currentStates, *currentState.splitState)
+					currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
 					leftState := currentState.next
 					copyThread(leftState, currentState)
-					currentStates = append(currentStates, *currentState.next)
+					currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
 				} else {
 					leftState := currentState.next
 					copyThread(leftState, currentState)
-					currentStates = append(currentStates, *currentState.next)
+					currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
 					rightState := currentState.splitState
 					copyThread(rightState, currentState)
-					currentStates = append(currentStates, *currentState.splitState)
+					currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
 				}
 				continue
 			}
@@ -417,6 +417,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 					currentStates = slices.Insert(currentStates, currentStateIdx+1, allMatches...)
 				} else if currentState.groupEnd {
 					currentStates = append(currentStates, allMatches...)
+				} else if currentState.assert != noneAssert {
+					currentStates = append(currentStates, allMatches...)
 				} else {
 					nextStates = append(nextStates, allMatches...)
 				}
-- 
2.30.2


From 8534174ea1d83d3d8a7ed9e8e837cd075daaa5e0 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Thu, 6 Feb 2025 22:06:22 -0500
Subject: [PATCH 17/48] Use pointers instead of values

---
 regex/compile.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/regex/compile.go b/regex/compile.go
index 0429c37..fa51e0d 100644
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -987,7 +987,8 @@ func thompson(re []postfixNode) (Reg, error) {
 		if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated
 			// Map the list of nodes to a list of states, each state containing the contents of a specific node
 			states := funcMap(c.nodeContents, func(node postfixNode) *nfaState {
-				s := newState()
+				s := &nfaState{}
+				s.output = append(s.output, s)
 				nodeContents := node.contents
 				if caseInsensitive {
 					nodeContents = slices.Concat(funcMap(nodeContents, func(r rune) []rune {
@@ -1001,7 +1002,7 @@ func thompson(re []postfixNode) (Reg, error) {
 						return n.contents
 					})...)
 				}
-				return &s
+				return s
 			})
 			// Reduce the list of states down to a single state by alternating them
 			toAdd := funcReduce(states, func(s1 *nfaState, s2 *nfaState) *nfaState {
-- 
2.30.2


From 1d4f695f8f72f113528c28e850f4b4954a233930 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Thu, 6 Feb 2025 22:06:51 -0500
Subject: [PATCH 18/48] Wrote function to check if a state is in an nfaState,
 based on the Equals function

---
 regex/nfa.go | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/regex/nfa.go b/regex/nfa.go
index 79daaf6..d051a25 100644
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -47,7 +47,6 @@ type nfaState struct {
 	// The following properties depend on the current match - I should think about resetting them for every match.
 	zeroMatchFound bool    // Whether or not the state has been used for a zero-length match - only relevant for zero states
 	threadGroups   []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
-	threadSP       int     // The string pointer of the thread - where it is in the input string
 }
 
 // Clones the NFA starting from the given state.
@@ -123,7 +122,6 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	}
 	// Assuming it hasn't been visited
 	state.threadGroups = nil
-	state.threadSP = 0
 	visitedMap[state] = true
 	if state.isAlternation {
 		resetThreadsHelper(state.next, visitedMap)
@@ -408,3 +406,32 @@ func zeroLengthMatchState() nfaState {
 	start.assert = alwaysTrueAssert
 	return start
 }
+
+func (s nfaState) equals(other nfaState) bool {
+	return slices.Equal(s.content, other.content) &&
+		s.isEmpty == other.isEmpty &&
+		s.isLast == other.isLast &&
+		slices.Equal(s.output, other.output) &&
+		s.next == other.next &&
+		s.isKleene == other.isKleene &&
+		s.isQuestion == other.isQuestion &&
+		s.isAlternation == other.isAlternation &&
+		s.splitState == other.splitState &&
+		s.assert == other.assert &&
+		s.allChars == other.allChars &&
+		slices.Equal(s.except, other.except) &&
+		s.lookaroundNFA == other.lookaroundNFA &&
+		s.groupBegin == other.groupBegin &&
+		s.groupEnd == other.groupEnd &&
+		s.groupNum == other.groupNum &&
+		slices.Equal(s.threadGroups, other.threadGroups)
+}
+
+func stateExists(list []nfaState, s nfaState) bool {
+	for i := range list {
+		if list[i].equals(s) {
+			return true
+		}
+	}
+	return false
+}
-- 
2.30.2


From ccf3b3b29964b44a5477225b93035bded96ade84 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Thu, 6 Feb 2025 22:08:56 -0500
Subject: [PATCH 19/48] More progress on implementing PCRE matching

---
 regex/matching.go | 62 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 95f7a69..06fd16b 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -152,7 +152,6 @@ func pruneIndices(indices []Match) []Match {
 }
 
 func copyThread(to *nfaState, from nfaState) {
-	to.threadSP = from.threadSP
 	to.threadGroups = append([]Group{}, from.threadGroups...)
 }
 
@@ -253,6 +252,35 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 	return indices
 }
 
+func addStateToList(idx int, list []nfaState, state nfaState) []nfaState {
+	if stateExists(list, state) {
+		return list
+	}
+	if state.isAlternation {
+		copyThread(state.next, state)
+		list = append(list, addStateToList(idx, list, *state.next)...)
+		copyThread(state.splitState, state)
+		list = append(list, addStateToList(idx, list, *state.splitState)...)
+		return list
+	}
+	if state.isKleene {
+		copyThread(state.splitState, state)
+		list = append(list, addStateToList(idx, list, *state.splitState)...)
+		copyThread(state.next, state)
+		list = append(list, addStateToList(idx, list, *state.next)...)
+		return list
+	}
+	if state.groupBegin {
+		state.threadGroups[state.groupNum].StartIdx = idx
+	}
+	if state.groupEnd {
+		state.threadGroups[state.groupNum].StartIdx = idx
+	}
+	copyThread(state.next, state)
+	return append(list, *state.next)
+
+}
+
 // Helper for FindAllMatches. Returns whether it found a match, the
 // first Match it finds, and how far it got into the string ie. where
 // the next search should start from.
@@ -307,7 +335,6 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	//		tempIndices[start.groupNum].startIdx = i
 	//}
 
-	start.threadSP = i
 	currentStates = append(currentStates, *start)
 	var foundMatch bool
 	var isEmptyAndNoAssertion bool
@@ -404,23 +431,28 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			}
 
 			if isEmptyAndNoAssertion || foundMatch {
-				allMatches := make([]nfaState, 0)
-				allMatches = append(allMatches, *(currentState.next))
-				slices.Reverse(allMatches)
-				for i := range allMatches {
-					copyThread(&allMatches[i], currentState)
-					if foundMatch && currentState.assert == noneAssert {
-						allMatches[i].threadSP += 1
-					}
-				}
+				nextMatch := *(currentState.next)
+				copyThread(&nextMatch, currentState)
 				if currentState.groupBegin {
-					currentStates = slices.Insert(currentStates, currentStateIdx+1, allMatches...)
+					//	if !stateExists(currentStates, nextMatch) {
+					currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
+					//}
 				} else if currentState.groupEnd {
-					currentStates = append(currentStates, allMatches...)
+					if !stateExists(currentStates, nextMatch) {
+						currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
+					}
 				} else if currentState.assert != noneAssert {
-					currentStates = append(currentStates, allMatches...)
+					if !stateExists(currentStates, nextMatch) {
+						currentStates = append(currentStates, nextMatch)
+					}
+				} else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
+					if !stateExists(currentStates, nextMatch) {
+						currentStates = append(currentStates, nextMatch)
+					}
 				} else {
-					nextStates = append(nextStates, allMatches...)
+					if !stateExists(nextStates, nextMatch) {
+						nextStates = append(nextStates, nextMatch)
+					}
 				}
 			}
 
-- 
2.30.2


From d2ad0d95a88f53248bf2b2739f59d0e037a12574 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Fri, 7 Feb 2025 16:04:26 -0500
Subject: [PATCH 20/48] Modified question operator so that it doesn't create an
 unnecessary zero-state

---
 regex/nfa.go | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/regex/nfa.go b/regex/nfa.go
index d051a25..d7ac1af 100644
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -329,9 +329,6 @@ func kleene(s1 *nfaState) (*nfaState, error) {
 	toReturn.isAlternation = true
 	toReturn.content = newContents(epsilon)
 	toReturn.splitState = s1
-	for i := range s1.output {
-		s1.output[i].next = toReturn
-	}
 
 	//	toReturn := &nfaState{}
 	//	toReturn.transitions = make(map[int][]*nfaState)
@@ -373,14 +370,20 @@ func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
 	return toReturn
 }
 
-func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
-	s2 := &nfaState{}
+func question(s1 *nfaState) (*nfaState, error) { // Use the fact that ab? == a(b|)
+	if s1.isEmpty && s1.assert != noneAssert {
+		return nil, fmt.Errorf("previous token is not quantifiable")
+	}
+	toReturn := &nfaState{}
+	toReturn.isEmpty = true
+	toReturn.isAlternation = true
+	toReturn.isQuestion = true
+	toReturn.content = newContents(epsilon)
+	toReturn.splitState = s1
+	toReturn.output = append([]*nfaState{}, toReturn)
+	toReturn.output = append(toReturn.output, s1.output...)
 	//	s2.transitions = make(map[int][]*nfaState)
-	s2.content = newContents(epsilon)
-	s2.output = append(s2.output, s2)
-	s2.isEmpty = true
-	s3 := alternate(s1, s2)
-	return s3
+	return toReturn, nil
 }
 
 // Creates and returns a new state with the 'default' values.
-- 
2.30.2


From 052de558261ce31b4603f43b1a7dde286e8890ce Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Fri, 7 Feb 2025 16:04:46 -0500
Subject: [PATCH 21/48] question() now returns 2 values

---
 regex/compile.go | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/regex/compile.go b/regex/compile.go
index fa51e0d..03e9f54 100644
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -1047,7 +1047,10 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, fmt.Errorf("error applying question operator")
 			}
-			s2 := question(s1)
+			s2, err := question(s1)
+			if err != nil {
+				return Reg{}, err
+			}
 			nfa = append(nfa, s2)
 		case pipeNode:
 			// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
@@ -1106,7 +1109,11 @@ func thompson(re []postfixNode) (Reg, error) {
 				stateToAdd = concatenate(stateToAdd, s2)
 			} else { // Case 2
 				for i := c.startReps; i < c.endReps; i++ {
-					stateToAdd = concatenate(stateToAdd, question(cloneState(poppedState)))
+					tmp, err := question(cloneState(poppedState))
+					if err != nil {
+						return Reg{}, fmt.Errorf("error processing bounded repetition")
+					}
+					stateToAdd = concatenate(stateToAdd, tmp)
 				}
 			}
 			nfa = append(nfa, stateToAdd)
-- 
2.30.2


From 3604486a9b8195dc4fa05d496a2943597ad5e3ae Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Fri, 7 Feb 2025 16:06:45 -0500
Subject: [PATCH 22/48] Used Pike's algorithm (an extension to Thompson's
 algorithm) (see Russ Cox's 2nd article); I think I almost have a working
 PCRE-style engine

---
 regex/matching.go | 226 +++++++++++++++++++++-------------------------
 1 file changed, 102 insertions(+), 124 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 06fd16b..dab6446 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -2,7 +2,6 @@ package regex
 
 import (
 	"fmt"
-	"slices"
 	"sort"
 )
 
@@ -252,32 +251,36 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 	return indices
 }
 
-func addStateToList(idx int, list []nfaState, state nfaState) []nfaState {
+func addStateToList(idx int, list []nfaState, state nfaState, threadGroups []Group) []nfaState {
 	if stateExists(list, state) {
 		return list
 	}
+	if state.isKleene || state.isQuestion {
+		copyThread(state.splitState, state)
+		list = addStateToList(idx, list, *state.splitState, threadGroups)
+		copyThread(state.next, state)
+		list = addStateToList(idx, list, *state.next, threadGroups)
+		return list
+	}
 	if state.isAlternation {
 		copyThread(state.next, state)
-		list = append(list, addStateToList(idx, list, *state.next)...)
+		list = addStateToList(idx, list, *state.next, threadGroups)
 		copyThread(state.splitState, state)
-		list = append(list, addStateToList(idx, list, *state.splitState)...)
-		return list
-	}
-	if state.isKleene {
-		copyThread(state.splitState, state)
-		list = append(list, addStateToList(idx, list, *state.splitState)...)
-		copyThread(state.next, state)
-		list = append(list, addStateToList(idx, list, *state.next)...)
+		list = addStateToList(idx, list, *state.splitState, threadGroups)
 		return list
 	}
+
+	state.threadGroups = append([]Group{}, threadGroups...)
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
+		return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...)
 	}
 	if state.groupEnd {
-		state.threadGroups[state.groupNum].StartIdx = idx
+		state.threadGroups[state.groupNum].EndIdx = idx
+		return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...)
 	}
-	copyThread(state.next, state)
-	return append(list, *state.next)
+	state.threadGroups = append([]Group{}, threadGroups...)
+	return append(list, state)
 
 }
 
@@ -335,138 +338,113 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	//		tempIndices[start.groupNum].startIdx = i
 	//}
 
-	currentStates = append(currentStates, *start)
-	var foundMatch bool
-	var isEmptyAndNoAssertion bool
+	start.threadGroups = newMatch(numGroups + 1)
+	start.threadGroups[0].StartIdx = i
+	currentStates = addStateToList(i, currentStates, *start, start.threadGroups)
+	var match Match = nil
+	//	var isEmptyAndNoAssertion bool
 	// Main loop
 	for idx := i; idx <= len(str); idx++ {
+		if len(currentStates) == 0 {
+			break
+		}
 		for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
 			currentState := currentStates[currentStateIdx]
-			foundMatch = false
-			isEmptyAndNoAssertion = false
 
 			if currentState.threadGroups == nil {
 				currentState.threadGroups = newMatch(numGroups + 1)
 				currentState.threadGroups[0].StartIdx = idx
 			}
 
-			if currentState.groupBegin {
-				currentState.threadGroups[currentState.groupNum].StartIdx = idx
-				//		allMatches := make([]nfaState, 0)
-				//		for _, v := range currentState.transitions {
-				//			dereferenced := funcMap(v, func(s *nfaState) nfaState {
-				//				return *s
-				//			})
-				//			allMatches = append(allMatches, dereferenced...)
-				//		}
-				//		slices.Reverse(allMatches)
-				//		for i := range allMatches {
-				//			copyThread(&allMatches[i], currentState)
-				//		}
-				//		currentStates = append(currentStates, allMatches...)
-			}
-			if currentState.groupEnd {
-				currentState.threadGroups[currentState.groupNum].EndIdx = idx
-				//			allMatches := make([]nfaState, 0)
-				//			for _, v := range currentState.transitions {
-				//				dereferenced := funcMap(v, func(s *nfaState) nfaState {
-				//					return *s
-				//				})
-				//				allMatches = append(allMatches, dereferenced...)
-				//			}
-				//			slices.Reverse(allMatches)
-				//			for i := range allMatches {
-				//				copyThread(&allMatches[i], currentState)
-				//			}
-				//			currentStates = append(currentStates, allMatches...)
+			if currentState.isLast {
+				currentState.threadGroups[0].EndIdx = idx
+				match = append([]Group{}, currentState.threadGroups...)
+				break
+			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
+				if currentState.contentContains(str, idx) {
+					nextStates = addStateToList(idx+1, nextStates, *currentState.next, currentState.threadGroups)
+				}
 			}
 
-			//		if currentState.isKleene {
-			//			// Append the next-state (after the kleene), then append the kleene state
-			//			allMatches := make([]*nfaState, 0)
-			//			for _, v := range currentState.transitions {
-			//				allMatches = append(allMatches, v...)
+			//			if currentState.groupBegin {
+			//				currentState.threadGroups[currentState.groupNum].StartIdx = idx
 			//			}
-			//			slices.Reverse(allMatches)
-			//			for _, m := range allMatches {
-			//				m.threadGroups = currentState.threadGroups
-			//				m.threadSP = idx
+			//			if currentState.groupEnd {
+			//				currentState.threadGroups[currentState.groupNum].EndIdx = idx
 			//			}
-			//			currentStates = append(currentStates, allMatches...)
-			//
-			//			//	kleeneState := currentState.kleeneState
-			//			//	kleeneState.threadGroups = currentState.threadGroups
-			//			//	kleeneState.threadSP = currentState.threadSP
-			//			//	currentStates = append(currentStates, kleeneState)
-			//			continue
-			//		}
 
 			// Alternation - enqueue left then right state, and continue
-			if currentState.isAlternation {
-				if currentState.isKleene { // Reverse order of adding things
-					rightState := currentState.splitState
-					copyThread(rightState, currentState)
-					currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
-					leftState := currentState.next
-					copyThread(leftState, currentState)
-					currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
-				} else {
-					leftState := currentState.next
-					copyThread(leftState, currentState)
-					currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
-					rightState := currentState.splitState
-					copyThread(rightState, currentState)
-					currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
-				}
-				continue
-			}
+			//			if currentState.isAlternation {
+			//				if currentState.isKleene { // Reverse order of adding things
+			//					rightState := currentState.splitState
+			//					copyThread(rightState, currentState)
+			//					currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
+			//					leftState := currentState.next
+			//					copyThread(leftState, currentState)
+			//					currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
+			//				} else {
+			//					leftState := currentState.next
+			//					copyThread(leftState, currentState)
+			//					currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
+			//					rightState := currentState.splitState
+			//					copyThread(rightState, currentState)
+			//					currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
+			//				}
+			//				continue
+			//			}
 
 			// Empty state - enqueue next state, do _not_ increment the SP
-			if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
-				isEmptyAndNoAssertion = true
-			}
-
-			if currentState.contentContains(str, idx) {
-				foundMatch = true
-			}
-
-			if isEmptyAndNoAssertion || foundMatch {
-				nextMatch := *(currentState.next)
-				copyThread(&nextMatch, currentState)
-				if currentState.groupBegin {
-					//	if !stateExists(currentStates, nextMatch) {
-					currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
-					//}
-				} else if currentState.groupEnd {
-					if !stateExists(currentStates, nextMatch) {
-						currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
-					}
-				} else if currentState.assert != noneAssert {
-					if !stateExists(currentStates, nextMatch) {
-						currentStates = append(currentStates, nextMatch)
-					}
-				} else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
-					if !stateExists(currentStates, nextMatch) {
-						currentStates = append(currentStates, nextMatch)
-					}
-				} else {
-					if !stateExists(nextStates, nextMatch) {
-						nextStates = append(nextStates, nextMatch)
-					}
-				}
-			}
-
-			if currentState.isLast && len(nextStates) == 0 { // Last state reached
-				currentState.threadGroups[0].EndIdx = idx
-				if idx == currentState.threadGroups[0].StartIdx {
-					idx += 1
-				}
-				return true, currentState.threadGroups, idx
-			}
+			//			if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
+			//				isEmptyAndNoAssertion = true
+			//			}
+			//
+			//			if currentState.contentContains(str, idx) {
+			//				foundMatch = true
+			//			}
+			//
+			//			if isEmptyAndNoAssertion || foundMatch {
+			//				nextMatch := *(currentState.next)
+			//				copyThread(&nextMatch, currentState)
+			//				if currentState.groupBegin {
+			//					//	if !stateExists(currentStates, nextMatch) {
+			//					currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
+			//					//}
+			//				} else if currentState.groupEnd {
+			//					if !stateExists(currentStates, nextMatch) {
+			//						currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
+			//					}
+			//				} else if currentState.assert != noneAssert {
+			//					if !stateExists(currentStates, nextMatch) {
+			//						currentStates = append(currentStates, nextMatch)
+			//					}
+			//				} else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
+			//					if !stateExists(currentStates, nextMatch) {
+			//						currentStates = append(currentStates, nextMatch)
+			//					}
+			//				} else {
+			//					if !stateExists(nextStates, nextMatch) {
+			//						nextStates = append(nextStates, nextMatch)
+			//					}
+			//				}
+			//			}
+			//
+			//			if currentState.isLast && len(nextStates) == 0 { // Last state reached
+			//				currentState.threadGroups[0].EndIdx = idx
+			//				if idx == currentState.threadGroups[0].StartIdx {
+			//					idx += 1
+			//				}
+			//				return true, currentState.threadGroups, idx
+			//			}
 		}
 		currentStates = append([]nfaState{}, nextStates...)
 		nextStates = nil
 	}
+	if match != nil {
+		if offset == match[0].EndIdx {
+			return true, match, match[0].EndIdx + 1
+		}
+		return true, match, match[0].EndIdx
+	}
 	return false, []Group{}, i + 1
 	//		zeroStates := make([]*nfaState, 0)
 	//		// Keep taking zero-states, until there are no more left to take
-- 
2.30.2


From 22ead83625401c232c175da1566baf5e6652f1e6 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Fri, 7 Feb 2025 16:19:36 -0500
Subject: [PATCH 23/48] Fixed assertion matching

---
 regex/matching.go | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index dab6446..d2925bd 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -251,35 +251,39 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 	return indices
 }
 
-func addStateToList(idx int, list []nfaState, state nfaState, threadGroups []Group) []nfaState {
+func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group) []nfaState {
 	if stateExists(list, state) {
 		return list
 	}
 	if state.isKleene || state.isQuestion {
 		copyThread(state.splitState, state)
-		list = addStateToList(idx, list, *state.splitState, threadGroups)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups)
 		copyThread(state.next, state)
-		list = addStateToList(idx, list, *state.next, threadGroups)
+		list = addStateToList(str, idx, list, *state.next, threadGroups)
 		return list
 	}
 	if state.isAlternation {
 		copyThread(state.next, state)
-		list = addStateToList(idx, list, *state.next, threadGroups)
+		list = addStateToList(str, idx, list, *state.next, threadGroups)
 		copyThread(state.splitState, state)
-		list = addStateToList(idx, list, *state.splitState, threadGroups)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups)
 		return list
 	}
-
 	state.threadGroups = append([]Group{}, threadGroups...)
+	if state.assert != noneAssert {
+		if state.checkAssertion(str, idx) {
+			copyThread(state.next, state)
+			return append(list, addStateToList(str, idx, list, *state.next, state.threadGroups)...)
+		}
+	}
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
-		return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...)
+		return append(list, addStateToList(str, idx, list, *state.next, state.threadGroups)...)
 	}
 	if state.groupEnd {
 		state.threadGroups[state.groupNum].EndIdx = idx
-		return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...)
+		return append(list, addStateToList(str, idx, list, *state.next, state.threadGroups)...)
 	}
-	state.threadGroups = append([]Group{}, threadGroups...)
 	return append(list, state)
 
 }
@@ -340,7 +344,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 	start.threadGroups = newMatch(numGroups + 1)
 	start.threadGroups[0].StartIdx = i
-	currentStates = addStateToList(i, currentStates, *start, start.threadGroups)
+	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups)
 	var match Match = nil
 	//	var isEmptyAndNoAssertion bool
 	// Main loop
@@ -362,7 +366,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 				break
 			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
 				if currentState.contentContains(str, idx) {
-					nextStates = addStateToList(idx+1, nextStates, *currentState.next, currentState.threadGroups)
+					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups)
 				}
 			}
 
-- 
2.30.2


From 99230b49de346feea1e917cb3e3565310da86676 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sat, 8 Feb 2025 16:05:35 -0500
Subject: [PATCH 24/48] Use new function signature for zeroLengthMatchState()

---
 regex/compile.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/regex/compile.go b/regex/compile.go
index 03e9f54..8e010dc 100644
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -816,7 +816,7 @@ func thompson(re []postfixNode) (Reg, error) {
 	// In these cases, we will return an NFA with 1 state, with an assertion that is always true.
 	if len(re) == 0 {
 		start := zeroLengthMatchState()
-		nfa = append(nfa, &start)
+		nfa = append(nfa, start)
 	}
 
 	for _, c := range re {
@@ -1068,14 +1068,14 @@ func thompson(re []postfixNode) (Reg, error) {
 					nfa = append(nfa, s2)
 				}
 				tmp := zeroLengthMatchState()
-				s2 = &tmp
+				s2 = tmp
 			}
 			if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err1 == nil { // See above for explanation
 					nfa = append(nfa, s1)
 				}
 				tmp := zeroLengthMatchState()
-				s1 = &tmp
+				s1 = tmp
 			}
 			s3 := alternate(s1, s2)
 			nfa = append(nfa, s3)
-- 
2.30.2


From 62ca1a872aa09772cff5a591dee9d2311a25f334 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sat, 8 Feb 2025 16:06:14 -0500
Subject: [PATCH 25/48] Made zeroLengthMatchState() return a pointer; reduced
 the number of comparisons performd by nfaState.equals

---
 regex/nfa.go | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/regex/nfa.go b/regex/nfa.go
index d7ac1af..8bd1d74 100644
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -402,19 +402,20 @@ func newState() nfaState {
 }
 
 // Creates and returns a state that _always_ has a zero-length match.
-func zeroLengthMatchState() nfaState {
-	start := newState()
+func zeroLengthMatchState() *nfaState {
+	start := &nfaState{}
 	start.content = newContents(epsilon)
 	start.isEmpty = true
 	start.assert = alwaysTrueAssert
+	start.output = append([]*nfaState{}, start)
 	return start
 }
 
 func (s nfaState) equals(other nfaState) bool {
-	return slices.Equal(s.content, other.content) &&
-		s.isEmpty == other.isEmpty &&
+	return s.isEmpty == other.isEmpty &&
 		s.isLast == other.isLast &&
 		slices.Equal(s.output, other.output) &&
+		slices.Equal(s.content, other.content) &&
 		s.next == other.next &&
 		s.isKleene == other.isKleene &&
 		s.isQuestion == other.isQuestion &&
-- 
2.30.2


From f15a5cae348e19b31f4e50358616629a075d4d79 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sat, 8 Feb 2025 16:07:01 -0500
Subject: [PATCH 26/48] Store all states visited in a single run of
 'addStateToList()' in a slice

---
 regex/matching.go | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index d2925bd..6a5e0e7 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -251,38 +251,40 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 	return indices
 }
 
-func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group) []nfaState {
-	if stateExists(list, state) {
+func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState) []nfaState {
+	if stateExists(list, state) || stateExists(visited, state) {
 		return list
 	}
+	visited = append(visited, state)
+
 	if state.isKleene || state.isQuestion {
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
 		return list
 	}
 	if state.isAlternation {
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
 		return list
 	}
 	state.threadGroups = append([]Group{}, threadGroups...)
 	if state.assert != noneAssert {
 		if state.checkAssertion(str, idx) {
 			copyThread(state.next, state)
-			return append(list, addStateToList(str, idx, list, *state.next, state.threadGroups)...)
+			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
 		}
 	}
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
-		return append(list, addStateToList(str, idx, list, *state.next, state.threadGroups)...)
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
 	}
 	if state.groupEnd {
 		state.threadGroups[state.groupNum].EndIdx = idx
-		return append(list, addStateToList(str, idx, list, *state.next, state.threadGroups)...)
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
 	}
 	return append(list, state)
 
@@ -344,7 +346,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 	start.threadGroups = newMatch(numGroups + 1)
 	start.threadGroups[0].StartIdx = i
-	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups)
+	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil)
 	var match Match = nil
 	//	var isEmptyAndNoAssertion bool
 	// Main loop
@@ -366,7 +368,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 				break
 			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
 				if currentState.contentContains(str, idx) {
-					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups)
+					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil)
 				}
 			}
 
-- 
2.30.2


From d4e3942d27a0af01620eda97a7d37925873e9487 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 08:58:09 -0500
Subject: [PATCH 27/48] Added Match() and FindStringSubmatch(); removed old
 code; updated comments

---
 regex/matching.go | 99 +++++++++++++++++++----------------------------
 1 file changed, 39 insertions(+), 60 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 6a5e0e7..7864084 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -14,7 +14,7 @@ import (
 // See [Reg.FindSubmatch] for an example.
 type Match []Group
 
-// a Group represents a group. It contains the start index and end index of the match
+// a Group represents a capturing group. It contains the start and index of the group.
 type Group struct {
 	StartIdx int
 	EndIdx   int
@@ -58,7 +58,7 @@ func (idx Group) String() string {
 	return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
 }
 
-// Returns whether a group is valid (ie. whether it matched any text). It
+// IsValid returns whether a group is valid (ie. whether it matched any text). It
 // simply ensures that both indices of the group are >= 0.
 func (g Group) IsValid() bool {
 	return g.StartIdx >= 0 && g.EndIdx >= 0
@@ -69,63 +69,6 @@ func getZeroGroup(m Match) Group {
 	return m[0]
 }
 
-// takeZeroState takes the 0-state (if such a transition exists) for all states in the
-// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
-// the second ret val is true.
-// If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
-//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
-//	for _, state := range states {
-//		if len(state.transitions[epsilon]) > 0 {
-//			for _, s := range state.transitions[epsilon] {
-//				if s.threadGroups == nil {
-//					s.threadGroups = newMatch(numGroups + 1)
-//				}
-//				copy(s.threadGroups, state.threadGroups)
-//				if s.groupBegin {
-//					s.threadGroups[s.groupNum].StartIdx = idx
-//					//					openParenGroups = append(openParenGroups, s.groupNum)
-//				}
-//				if s.groupEnd {
-//					s.threadGroups[s.groupNum].EndIdx = idx
-//					//					closeParenGroups = append(closeParenGroups, s.groupNum)
-//				}
-//			}
-//			rtv = append(rtv, state.transitions[epsilon]...)
-//		}
-//	}
-//	for _, state := range rtv {
-//		if len(state.transitions[epsilon]) > 0 {
-//			return rtv, true
-//		}
-//	}
-//	return rtv, false
-//}
-
-// zeroMatchPossible returns true if a zero-length match is possible
-// from any of the given states, given the string and our position in it.
-// It uses the same algorithm to find zero-states as the one inside the loop,
-// so I should probably put it in a function.
-//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
-//	zeroStates, isZero := takeZeroState(states, numGroups, idx)
-//	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
-//	tempstates = append(tempstates, states...)
-//	tempstates = append(tempstates, zeroStates...)
-//	num_appended := 0 // number of unique states addded to tempstates
-//	for isZero == true {
-//		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
-//		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
-//		if num_appended == 0 { // break if we haven't appended any more unique values
-//			break
-//		}
-//	}
-//	for _, state := range tempstates {
-//		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
-//			return true
-//		}
-//	}
-//	return false
-//}
-
 // Prunes the slice by removing overlapping indices.
 func pruneIndices(indices []Match) []Match {
 	// First, sort the slice by the start indices
@@ -164,6 +107,12 @@ func (regex Reg) Find(str string) (Group, error) {
 	return getZeroGroup(match), nil
 }
 
+// Match returns a boolean value, indicating whether the regex found a match in the given string.
+func (regex Reg) Match(str string) bool {
+	_, err := regex.Find(str)
+	return err == nil
+}
+
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
 func (regex Reg) FindAll(str string) []Group {
@@ -199,7 +148,37 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 	}
 }
 
-// FindAllString is the 'all' version of FindString.
+// FindStringSubmatch is the 'string' version of [FindSubmatch]. It returns a slice of strings,
+// where the string at index i contains the text matched by the i-th capturing group.
+// The 0-th index represents the entire match.
+// An empty string at index n could mean:
+// ,
+//  1. Group n did not find a match
+//  2. Group n found a zero-length match
+//
+// A return value of nil indicates no match.
+func (regex Reg) FindStringSubmatch(str string) []string {
+	matchStr := make([]string, regex.numGroups+1)
+	match, err := regex.FindSubmatch(str)
+	if err != nil {
+		return nil
+	}
+	nonEmptyMatchFound := false
+	for i := range match {
+		if match[i].IsValid() {
+			matchStr[i] = str[match[i].StartIdx:match[i].EndIdx]
+			nonEmptyMatchFound = true
+		} else {
+			matchStr[i] = ""
+		}
+	}
+	if nonEmptyMatchFound == false {
+		return nil
+	}
+	return matchStr
+}
+
+// FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
 func (regex Reg) FindAllString(str string) []string {
-- 
2.30.2


From c577064977b7921f0db200b17a144343ebf92162 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 08:58:46 -0500
Subject: [PATCH 28/48] Added string field to Reg, that contains the expression
 string; wrote method to return the string

---
 regex/compile.go | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/regex/compile.go b/regex/compile.go
index 8e010dc..8dbcf37 100644
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -12,18 +12,24 @@ var notDotChars []rune
 
 // A Reg represents the result of compiling a regular expression. It contains
 // the startState of the NFA representation of the regex, and the number of capturing
-// groups in the regex.
+// groups in the regex. It also contains the expression string.
 type Reg struct {
 	start     *nfaState
 	numGroups int
+	str       string
 }
 
-// numSubexp eturns the number of sub-expressions in the given [Reg]. This is equivalent
+// NumSubexp returns the number of sub-expressions in the given [Reg]. This is equivalent
 // to the number of capturing groups.
 func (r Reg) NumSubexp() int {
 	return r.numGroups
 }
 
+// String returns the string used to compile the expression.
+func (r Reg) String() string {
+	return r.str
+}
+
 const concatRune rune = 0xF0001
 
 // Flags for shuntingYard - control its behavior
@@ -1128,7 +1134,8 @@ func thompson(re []postfixNode) (Reg, error) {
 
 	concatenate(nfa[0], &lastState)
 
-	return Reg{nfa[0], numGroups}, nil
+	// The string is empty here, because we add it in Compile()
+	return Reg{nfa[0], numGroups, ""}, nil
 
 }
 
@@ -1146,6 +1153,7 @@ func Compile(re string, flags ...ReFlag) (Reg, error) {
 	if err != nil {
 		return Reg{}, fmt.Errorf("error compiling regex: %w", err)
 	}
+	reg.str = re
 	return reg, nil
 }
 
-- 
2.30.2


From eddd2ae7001b0995a1e895b8ced05f45abee0e73 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 08:58:58 -0500
Subject: [PATCH 29/48] Updated documentation

---
 regex/doc.go | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/regex/doc.go b/regex/doc.go
index 1b821c1..9ca1f04 100644
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -4,6 +4,8 @@ Package regex implements regular expression search, using a custom non-bracktrac
 The engine relies completely on UTF-8 codepoints. As such, it is capable of matching characters
 from other languages, emojis and symbols.
 
+The API and regex syntax are largely compatible with that of the stdlib's [regexp], with a few key differences (see 'Key Differences with regexp').
+
 The full syntax is specified below.
 
 # Syntax
@@ -55,8 +57,8 @@ POSIX classes (inside normal character classes):
 Composition:
 
 	def				Match d, followed by e, followed by f
-	x|y				Match x or y (prefer longer one)
-	xy|z			Match xy or z
+	x|y				Match x or y (prefer x)
+	xy|z			Match xy or z (prefer xy)
 
 Repitition (always greedy, preferring more):
 
@@ -94,10 +96,11 @@ Lookarounds:
 Numeric ranges:
 
 	<x-y>			Match any number from x to y (inclusive) (x and y must be positive numbers)
+	\<x				Match a literal '<' followed by x
 
 # Key Differences with regexp
 
-The engine and the API differ from [regexp] in a number of ways, some of them very subtle.
+The engine and the API differ from [regexp] in a few ways, some of them very subtle.
 The key differences are mentioned below.
 
 1. Greediness:
@@ -132,7 +135,7 @@ Rather than using primitives for return values, my engine defines two types that
 values: a [Group] represents a capturing group, and a [Match] represents a list of groups.
 
 [regexp] specifies a regular expression that gives a list of all the matching functions that it supports. The
-equivalent expression for this engine is:
+equivalent expression for this engine is shown below. Note that 'Index' is the default.
 
 	Find(All)?(String)?(Submatch)?
 
@@ -140,7 +143,7 @@ equivalent expression for this engine is:
 
 If a function contains 'All' it returns all matches instead of just the leftmost one.
 
-If a function contains 'String' it returns the matched text, rather than the indices.
+If a function contains 'String' it returns the matched text, rather than the index in the string.
 
 If a function contains 'Submatch' it returns the match, including all submatches found by
 capturing groups.
@@ -156,5 +159,20 @@ and the input string:
 
 The 0th group would contain 'xy' and the 1st group would contain 'y'. Any matching function without 'Submatch' in its name
 returns the 0-group.
+
+# Feature Differences
+
+The following features from [regexp] are (currently) NOT supported:
+ 1. Named capturing groups
+ 2. Non-greedy operators
+ 3. Unicode character classes
+ 4. Embedded flags (flags are passed as arguments to [Compile])
+ 5. Literal text with \Q ... \E
+
+The following features are not available in [regexp], but are supported in my engine:
+ 1. Lookarounds
+ 2. Numeric ranges
+
+The goal is to shorten the first list, and expand the second.
 */
 package regex
-- 
2.30.2


From 78fb5606dd970c6225b12e90e0b15c3951328bb0 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 08:59:16 -0500
Subject: [PATCH 30/48] Use new definition of Reg

---
 regex/nfa.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/regex/nfa.go b/regex/nfa.go
index 8bd1d74..db53c00 100644
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -183,7 +183,7 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 			strToMatch = string(runesToMatch)
 		}
 
-		regComp := Reg{startState, s.lookaroundNumCaptureGroups}
+		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex}
 		matchIndices := regComp.FindAll(strToMatch)
 
 		numMatchesFound := 0
-- 
2.30.2


From 6334435b83d4963880f2a69fa309022264b1f316 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 09:01:42 -0500
Subject: [PATCH 31/48] Updated tests since the engine uses Perl matching
 instead of POSIX matching; added tests for FindStringSubmatch

---
 regex/re_test.go | 65 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 59 insertions(+), 6 deletions(-)

diff --git a/regex/re_test.go b/regex/re_test.go
index 2cccc72..b8b1381 100644
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -528,7 +528,7 @@ var groupTests = []struct {
 }{
 	{"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
 	{"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
-	{"(0)", nil, "ab", []Match{[]Group{}}},
+	{"(0)", nil, "ab", []Match{}},
 	{"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
 	{"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
 	{"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
@@ -538,9 +538,8 @@ var groupTests = []struct {
 	{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
 	{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
 	{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
+	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
 	{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
 	{"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
 	{"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
@@ -578,7 +577,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `bcdd`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, nil, `a`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, nil, `a`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\41`, nil, `a!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
 
@@ -633,7 +632,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `BCDD`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, []ReFlag{RE_CASE_INSENSITIVE}, `(A, B)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
 	{`(a)(b)c|ab`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}}}},
@@ -743,7 +742,7 @@ func TestFindString(t *testing.T) {
 				foundString := regComp.FindString(test.str)
 				if len(test.result) == 0 {
 					if foundString != "" {
-						t.Errorf("Expected no match got %v\n", foundString)
+						t.Errorf("Wanted no match got %v\n", foundString)
 					}
 				} else {
 					expectedString := test.str[test.result[0].StartIdx:test.result[0].EndIdx]
@@ -796,6 +795,56 @@ func TestFindSubmatch(t *testing.T) {
 					if test.result[0][i] != match[i] {
 						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 					}
+				} else {
+					if i < len(test.result) && test.result[0][i].IsValid() {
+						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
+					}
+				}
+			}
+		})
+	}
+}
+func TestFindStringSubmatch(t *testing.T) {
+	for _, test := range groupTests {
+		t.Run(test.re+"	"+test.str, func(t *testing.T) {
+			regComp, err := Compile(test.re, test.flags...)
+			if err != nil {
+				if test.result != nil {
+					panic(err)
+				}
+			}
+			matchStr := regComp.FindStringSubmatch(test.str)
+			if matchStr == nil {
+				if len(test.result) != 0 {
+					expectedStr := funcMap(test.result[0], func(g Group) string {
+						if g.IsValid() {
+							return test.str[g.StartIdx:g.EndIdx]
+						} else {
+							return ""
+						}
+					})
+					t.Errorf("Wanted %v got no match\n", expectedStr)
+				}
+			} else if len(test.result) == 0 {
+				t.Errorf("Wanted no match got %v\n", matchStr)
+			} else {
+				expectedStr := funcMap(test.result[0], func(g Group) string {
+					if g.IsValid() {
+						return test.str[g.StartIdx:g.EndIdx]
+					} else {
+						return ""
+					}
+				})
+				for i, groupStr := range matchStr {
+					if groupStr == "" {
+						if i < len(expectedStr) && expectedStr[i] != "" {
+							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
+						}
+					} else {
+						if expectedStr[i] != groupStr {
+							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
+						}
+					}
 				}
 			}
 		})
@@ -817,6 +866,10 @@ func TestFindAllSubmatch(t *testing.T) {
 						if test.result[i][j] != matchIndices[i][j] {
 							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 						}
+					} else {
+						if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
+							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
+						}
 					}
 				}
 			}
-- 
2.30.2


From c6ad4caa0d32f5ca828f70b3e2c1ded8dbedbcea Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 09:06:40 -0500
Subject: [PATCH 32/48] Removed a bunch of unused code (let's go!!!)

---
 regex/matching.go | 322 ----------------------------------------------
 1 file changed, 322 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 7864084..d9500ce 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -2,7 +2,6 @@ package regex
 
 import (
 	"fmt"
-	"sort"
 )
 
 // A Match represents a match found by the regex in a given string.
@@ -69,30 +68,6 @@ func getZeroGroup(m Match) Group {
 	return m[0]
 }
 
-// Prunes the slice by removing overlapping indices.
-func pruneIndices(indices []Match) []Match {
-	// First, sort the slice by the start indices
-	sort.Slice(indices, func(i, j int) bool {
-		return indices[i][0].StartIdx < indices[j][0].StartIdx
-	})
-	toRet := make([]Match, 0, len(indices))
-	current := indices[0]
-	for _, idx := range indices[1:] {
-		// idx doesn't overlap with current (starts after current ends), so add current to result
-		// and update the current.
-		if idx[0].StartIdx >= current[0].EndIdx {
-			toRet = append(toRet, current)
-			current = idx
-		} else if idx[0].EndIdx > current[0].EndIdx {
-			// idx overlaps, but it is longer, so update current
-			current = idx
-		}
-	}
-	// Add last state
-	toRet = append(toRet, current)
-	return toRet
-}
-
 func copyThread(to *nfaState, from nfaState) {
 	to.threadGroups = append([]Group{}, from.threadGroups...)
 }
@@ -223,9 +198,6 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 			indices = append(indices, matchIdx)
 		}
 	}
-	if len(indices) > 0 {
-		return pruneIndices(indices)
-	}
 
 	return indices
 }
@@ -272,8 +244,6 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 // Helper for FindAllMatches. Returns whether it found a match, the
 // first Match it finds, and how far it got into the string ie. where
 // the next search should start from.
-//
-//	Might return duplicates or overlapping indices, so care must be taken to prune the resulting array.
 func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
 	// Base case - exit if offset exceeds string's length
 	if offset > len(str) {
@@ -282,21 +252,9 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	}
 	resetThreads(start)
 
-	// Hold a list of match indices for the current run. When we
-	// can no longer find a match, the match with the largest range is
-	// chosen as the match for the entire string.
-	// This allows us to pick the longest possible match (which is how greedy matching works).
-	// COMMENT ABOVE IS CURRENTLY NOT UP-TO-DATE
-	//	tempIndices := newMatch(numGroups + 1)
-
-	//	foundPath := false
-	//startIdx := offset
-	//endIdx := offset
 	currentStates := make([]nfaState, 0)
 	nextStates := make([]nfaState, 0)
-	//	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
 	i := offset // Index in string
-	//startingFrom := i                  // Store starting index
 
 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
@@ -306,29 +264,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			return false, []Group{}, i
 		}
 	}
-	// Increment until we hit a character matching the start state (assuming not 0-state)
-	//	if start.isEmpty == false {
-	//		for i < len(str) && !start.contentContains(str, i) {
-	//			i++
-	//		}
-	//		startIdx = i
-	//		startingFrom = i
-	//		i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
-	//	}
-
-	//	start.threadGroups = newMatch(numGroups + 1)
-	// Check if the start state begins a group - if so, add the start index to our list
-	//if start.groupBegin {
-	//		start.threadGroups[start.groupNum].StartIdx = i
-	//		tempIndices[start.groupNum].startIdx = i
-	//}
 
 	start.threadGroups = newMatch(numGroups + 1)
 	start.threadGroups[0].StartIdx = i
 	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil)
 	var match Match = nil
-	//	var isEmptyAndNoAssertion bool
-	// Main loop
 	for idx := i; idx <= len(str); idx++ {
 		if len(currentStates) == 0 {
 			break
@@ -350,76 +290,6 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil)
 				}
 			}
-
-			//			if currentState.groupBegin {
-			//				currentState.threadGroups[currentState.groupNum].StartIdx = idx
-			//			}
-			//			if currentState.groupEnd {
-			//				currentState.threadGroups[currentState.groupNum].EndIdx = idx
-			//			}
-
-			// Alternation - enqueue left then right state, and continue
-			//			if currentState.isAlternation {
-			//				if currentState.isKleene { // Reverse order of adding things
-			//					rightState := currentState.splitState
-			//					copyThread(rightState, currentState)
-			//					currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
-			//					leftState := currentState.next
-			//					copyThread(leftState, currentState)
-			//					currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
-			//				} else {
-			//					leftState := currentState.next
-			//					copyThread(leftState, currentState)
-			//					currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
-			//					rightState := currentState.splitState
-			//					copyThread(rightState, currentState)
-			//					currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
-			//				}
-			//				continue
-			//			}
-
-			// Empty state - enqueue next state, do _not_ increment the SP
-			//			if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
-			//				isEmptyAndNoAssertion = true
-			//			}
-			//
-			//			if currentState.contentContains(str, idx) {
-			//				foundMatch = true
-			//			}
-			//
-			//			if isEmptyAndNoAssertion || foundMatch {
-			//				nextMatch := *(currentState.next)
-			//				copyThread(&nextMatch, currentState)
-			//				if currentState.groupBegin {
-			//					//	if !stateExists(currentStates, nextMatch) {
-			//					currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
-			//					//}
-			//				} else if currentState.groupEnd {
-			//					if !stateExists(currentStates, nextMatch) {
-			//						currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
-			//					}
-			//				} else if currentState.assert != noneAssert {
-			//					if !stateExists(currentStates, nextMatch) {
-			//						currentStates = append(currentStates, nextMatch)
-			//					}
-			//				} else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
-			//					if !stateExists(currentStates, nextMatch) {
-			//						currentStates = append(currentStates, nextMatch)
-			//					}
-			//				} else {
-			//					if !stateExists(nextStates, nextMatch) {
-			//						nextStates = append(nextStates, nextMatch)
-			//					}
-			//				}
-			//			}
-			//
-			//			if currentState.isLast && len(nextStates) == 0 { // Last state reached
-			//				currentState.threadGroups[0].EndIdx = idx
-			//				if idx == currentState.threadGroups[0].StartIdx {
-			//					idx += 1
-			//				}
-			//				return true, currentState.threadGroups, idx
-			//			}
 		}
 		currentStates = append([]nfaState{}, nextStates...)
 		nextStates = nil
@@ -431,196 +301,4 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 		return true, match, match[0].EndIdx
 	}
 	return false, []Group{}, i + 1
-	//		zeroStates := make([]*nfaState, 0)
-	//		// Keep taking zero-states, until there are no more left to take
-	//		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
-	//		topStateItem := currentStates.peek()
-	//		topState := topStateItem.(*priorQueueItem).state
-	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
-	//		tempStates = append(tempStates, zeroStates...)
-	//		num_appended := 0
-	//		for isZero == true {
-	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
-	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
-	//			if num_appended == 0 { // Break if we haven't appended any more unique values
-	//				break
-	//			}
-	//		}
-	//		if isZero == true {
-	//			currentStates.Pop()
-	//		}
-	//
-	//		for _, state := range tempStates {
-	//			heap.Push(currentStates, newPriorQueueItem(state))
-	//		}
-	//		tempStates = nil
-	//
-	//		// Take any transitions corresponding to current character
-	//		numStatesMatched := 0            // The number of states which had at least 1 match for this round
-	//		assertionFailed := false         // Whether or not an assertion failed for this round
-	//		lastStateInList := false         // Whether or not a last state was in our list of states
-	//		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
-	//		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
-	//		for numStatesMatched == 0 && lastStateInList == false {
-	//			if currentStates.Len() == 0 {
-	//				break
-	//			}
-	//			stateItem := heap.Pop(currentStates)
-	//			state := stateItem.(*priorQueueItem).state
-	//			matches, numMatches := state.matchesFor(str, i)
-	//			if numMatches > 0 {
-	//				numStatesMatched++
-	//				tempStates = append([]*nfaState(nil), matches...)
-	//				foundPath = true
-	//				for _, m := range matches {
-	//					if m.threadGroups == nil {
-	//						m.threadGroups = newMatch(numGroups + 1)
-	//					}
-	//					m.threadSP = state.threadSP + 1
-	//					copy(m.threadGroups, state.threadGroups)
-	//				}
-	//			}
-	//			if numMatches < 0 {
-	//				assertionFailed = true
-	//			}
-	//			if state.isLast {
-	//				if state.isLookaround() {
-	//					lastLookaroundInList = true
-	//				}
-	//				lastStateInList = true
-	//				lastStatePtr = state
-	//			}
-	//		}
-	//
-	//		if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
-	//			// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
-	//			// state. The explanation below is my attempt to explain this behavior.
-	//			// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
-	//			//
-	//			// One of the states in our list was a last state and a lookaround. In this case, we
-	//			// don't abort upon failure of the assertion, because we have found
-	//			// another path to a final state.
-	//			// Even if the last state _was_ an assertion, we can use the previously
-	//			// saved indices to find a match.
-	//			if lastLookaroundInList {
-	//				break
-	//			} else {
-	//				if i == startingFrom {
-	//					i++
-	//				}
-	//				return false, []Group{}, i
-	//			}
-	//		}
-	//		// Check if we can find a state in our list that is:
-	//		// 	a. A last-state
-	//		// 	b. Empty
-	//		// 	c. Doesn't assert anything
-	//		for _, stateItem := range *currentStates {
-	//			s := stateItem.state
-	//			if s.isLast && s.isEmpty && s.assert == noneAssert {
-	//				lastStatePtr = s
-	//				lastStateInList = true
-	//			}
-	//		}
-	//		if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
-	//			for j := 1; j < numGroups+1; j++ {
-	//				tempIndices[j] = lastStatePtr.threadGroups[j]
-	//			}
-	//			endIdx = i
-	//			tempIndices[0] = Group{startIdx, endIdx}
-	//			if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
-	//				return true, tempIndices, tempIndices[0].EndIdx + 1
-	//			} else {
-	//				return true, tempIndices, tempIndices[0].EndIdx
-	//			}
-	//		}
-	//
-	//		// Check if we can find a zero-length match
-	//		if foundPath == false {
-	//			currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
-	//				return item.state
-	//			})
-	//			if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
-	//				if tempIndices[0].IsValid() == false {
-	//					tempIndices[0] = Group{startIdx, startIdx}
-	//				}
-	//			}
-	//			// If we haven't moved in the string, increment the counter by 1
-	//			// to ensure we don't keep trying the same string over and over.
-	//			//			if i == startingFrom {
-	//			startIdx++
-	//			//	i++
-	//			//			}
-	//			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
-	//				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
-	//					return true, tempIndices, tempIndices[0].EndIdx + 1
-	//				} else {
-	//					return true, tempIndices, tempIndices[0].EndIdx
-	//				}
-	//			}
-	//			return false, []Group{}, startIdx
-	//		}
-	//		currentStates = &priorityQueue{}
-	//		slices.Reverse(tempStates)
-	//		for _, state := range tempStates {
-	//			heap.Push(currentStates, newPriorQueueItem(state))
-	//		}
-	//		tempStates = nil
-	//
-	//		i++
-	//	}
-	//
-	// // End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
-	// // This is the exact same algorithm used inside the loop, so I should probably put it in a function.
-	//
-	//	if currentStates.Len() > 0 {
-	//		topStateItem := currentStates.peek()
-	//		topState := topStateItem.(*priorQueueItem).state
-	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
-	//		tempStates = append(tempStates, zeroStates...)
-	//		num_appended := 0 // Number of unique states addded to tempStates
-	//		for isZero == true {
-	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
-	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
-	//			if num_appended == 0 { // Break if we haven't appended any more unique values
-	//				break
-	//			}
-	//		}
-	//	}
-	//
-	//	for _, state := range tempStates {
-	//		heap.Push(currentStates, newPriorQueueItem(state))
-	//	}
-	//
-	// tempStates = nil
-	//
-	//	for _, stateItem := range *currentStates {
-	//		state := stateItem.state
-	//		// Only add the match if the start index is in bounds. If the state has an assertion,
-	//		// make sure the assertion checks out.
-	//		if state.isLast && i <= len(str) {
-	//			if state.assert == noneAssert || state.checkAssertion(str, i) {
-	//				for j := 1; j < numGroups+1; j++ {
-	//					tempIndices[j] = state.threadGroups[j]
-	//				}
-	//				endIdx = i
-	//				tempIndices[0] = Group{startIdx, endIdx}
-	//			}
-	//		}
-	//	}
-	//
-	//	if tempIndices.numValidGroups() > 0 {
-	//		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
-	//			return true, tempIndices, tempIndices[0].EndIdx + 1
-	//		} else {
-	//			return true, tempIndices, tempIndices[0].EndIdx
-	//		}
-	//	}
-	//
-	// if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
-	//
-	//		startIdx++
-	//	}
-	//
-	// return false, []Group{}, startIdx
 }
-- 
2.30.2


From b7467a00f1b6882121f0191b80aa56d7b6530c28 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 09:07:43 -0500
Subject: [PATCH 33/48] Removed priorityQueue (unused)

---
 regex/priorityQueue.go | 89 ------------------------------------------
 1 file changed, 89 deletions(-)
 delete mode 100644 regex/priorityQueue.go

diff --git a/regex/priorityQueue.go b/regex/priorityQueue.go
deleted file mode 100644
index ae43e86..0000000
--- a/regex/priorityQueue.go
+++ /dev/null
@@ -1,89 +0,0 @@
-package regex
-
-import "container/heap"
-
-// Implement a priority queue using container/heap
-
-const (
-	min_priority int = iota
-	zerostate_priority
-	alternation_priority
-	kleene_priority
-	char_priority
-	max_priority
-)
-
-func getPriority(state *nfaState) int {
-	if state.isKleene {
-		return zerostate_priority
-	} else if state.isAlternation {
-		return alternation_priority
-	} else {
-		if state.isEmpty {
-			return zerostate_priority
-		} else {
-			return char_priority
-		}
-	}
-}
-
-type priorQueueItem struct {
-	state    *nfaState
-	priority int
-	index    int
-}
-
-func newPriorQueueItem(state *nfaState) *priorQueueItem {
-	return &priorQueueItem{
-		state:    state,
-		index:    -1,
-		priority: getPriority(state),
-	}
-}
-
-type priorityQueue []*priorQueueItem
-
-func (pq priorityQueue) Len() int {
-	return len(pq)
-}
-
-func (pq priorityQueue) Less(i, j int) bool {
-	if pq[i].priority == pq[j].priority {
-		return pq[i].index < pq[j].index
-	}
-	return pq[i].priority > pq[j].priority // We want max-heap, so we use greater-than
-}
-
-func (pq priorityQueue) Swap(i, j int) {
-	pq[i], pq[j] = pq[j], pq[i]
-	pq[i].index = i
-	pq[j].index = j
-}
-
-func (pq *priorityQueue) Push(x any) {
-	length := len(*pq)
-	item := x.(*priorQueueItem)
-	item.index = length
-	*pq = append(*pq, item)
-}
-
-func (pq *priorityQueue) Pop() any {
-	old := *pq
-	n := len(old)
-	item := old[n-1]
-	old[n-1] = nil
-	item.index = -1
-	*pq = old[0 : n-1]
-	return item
-}
-func (pq *priorityQueue) peek() any {
-	queue := *pq
-	n := len(queue)
-	return queue[n-1]
-}
-
-func (pq *priorityQueue) update(item *priorQueueItem, value *nfaState, priority int) {
-	item.state = value
-	item.priority = priority
-	heap.Fix(pq, item.index)
-}
-- 
2.30.2


From e546f01c208f6db4fa88de812f101e809c5b5b41 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 09:12:55 -0500
Subject: [PATCH 34/48] Removed redundant return (staticcheck)

---
 cmd/unique_array.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cmd/unique_array.go b/cmd/unique_array.go
index e03621a..88c56cb 100644
--- a/cmd/unique_array.go
+++ b/cmd/unique_array.go
@@ -16,7 +16,6 @@ func (s *uniq_arr[T]) add(vals ...T) {
 			s.backingMap[item] = struct{}{}
 		}
 	}
-	return
 }
 
 func (s uniq_arr[T]) contains(val T) bool {
-- 
2.30.2


From 7231169270f9e5ba688b913fd179e08250a7f8a9 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 09:13:03 -0500
Subject: [PATCH 35/48] Removed unused functions

---
 regex/misc.go | 43 -------------------------------------------
 1 file changed, 43 deletions(-)

diff --git a/regex/misc.go b/regex/misc.go
index 2d21e61..38b5313 100644
--- a/regex/misc.go
+++ b/regex/misc.go
@@ -48,49 +48,6 @@ func isNormalChar(c rune) bool {
 	return !slices.Contains(specialChars, c)
 }
 
-// Ensure that the given elements are only appended to the given slice if they
-// don't already exist. Returns the new slice, and the number of unique items appended.
-func uniqueAppend[T comparable](slc []T, items ...T) ([]T, int) {
-	num_appended := 0
-	for _, item := range items {
-		if !slices.Contains(slc, item) {
-			slc = append(slc, item)
-			num_appended++
-		}
-	}
-	return slc, num_appended
-}
-
-func uniqueAppendFunc[T any](slc []T, fn func(T, T) bool, items ...T) ([]T, int) {
-	toRet := make([]T, len(slc))
-	num_appended := 0
-	copy(toRet, slc)
-	for _, item := range items {
-		itemExists := false
-		for _, val := range slc {
-			if fn(item, val) {
-				itemExists = true
-			}
-		}
-		if !itemExists {
-			toRet = append(toRet, item)
-			num_appended++
-		}
-	}
-	return toRet, num_appended
-}
-
-// Returns true only if all the given elements are equal
-func allEqual[T comparable](items ...T) bool {
-	first := items[0]
-	for _, item := range items {
-		if item != first {
-			return false
-		}
-	}
-	return true
-}
-
 // Map function - convert a slice of T to a slice of V, based on a function
 // that maps a T to a V
 func funcMap[T, V any](slc []T, fn func(T) V) []V {
-- 
2.30.2


From d172a58258cc62523a78264e754ff3ddc6e3c42f Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 09:13:29 -0500
Subject: [PATCH 36/48] Throw error if match isn't found but test.result has >0
 elements

---
 regex/re_test.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/regex/re_test.go b/regex/re_test.go
index b8b1381..de6aaba 100644
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -790,6 +790,13 @@ func TestFindSubmatch(t *testing.T) {
 				}
 			}
 			match, err := regComp.FindSubmatch(test.str)
+			if err != nil {
+				if len(test.result) != 0 {
+					t.Errorf("Wanted %v got no match\n", test.result[0])
+				}
+			} else if len(test.result) == 0 {
+				t.Errorf("Wanted no match got %v\n", match)
+			}
 			for i := range match {
 				if match[i].IsValid() {
 					if test.result[0][i] != match[i] {
-- 
2.30.2


From 76e0170cb9fd5a19583d5dc7edc6c7c42d806e1c Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 09:13:52 -0500
Subject: [PATCH 37/48] Removed unused function

---
 regex/matching.go | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index d9500ce..1b07ee8 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -28,17 +28,6 @@ func newMatch(size int) Match {
 	return toRet
 }
 
-// Returns the number of valid groups in the match
-func (m Match) numValidGroups() int {
-	numValid := 0
-	for _, g := range m {
-		if g.StartIdx >= 0 && g.EndIdx >= 0 {
-			numValid++
-		}
-	}
-	return numValid
-}
-
 // Returns a string containing the indices of all (valid) groups in the match
 func (m Match) String() string {
 	var toRet string
-- 
2.30.2


From 835d495990f03f702f90d19e556216f8964e1db2 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 09:14:45 -0500
Subject: [PATCH 38/48] Removed capitalization for error message (staticcheck)

---
 regex/range2regex.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/regex/range2regex.go b/regex/range2regex.go
index a01dfff..de8e0f4 100644
--- a/regex/range2regex.go
+++ b/regex/range2regex.go
@@ -109,7 +109,7 @@ func range2regex(start int, end int) (string, error) {
 		startSlc := intToSlc(rg.start)
 		endSlc := intToSlc(rg.end)
 		if len(startSlc) != len(endSlc) {
-			return "", fmt.Errorf("Error parsing numeric range")
+			return "", fmt.Errorf("error parsing numeric range")
 		}
 		for i := range startSlc {
 			if startSlc[i] == endSlc[i] {
-- 
2.30.2


From 47f88c817f562729093b2cd62494b457e3c55061 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 15:14:17 -0500
Subject: [PATCH 39/48] Fixed typo

---
 regex/compile.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/regex/compile.go b/regex/compile.go
index 8dbcf37..9a703b9 100644
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -1157,7 +1157,7 @@ func Compile(re string, flags ...ReFlag) (Reg, error) {
 	return reg, nil
 }
 
-// MustCompile panicks if Compile returns an error. They are identical in all other respects.
+// MustCompile panics if Compile returns an error. They are identical in all other respects.
 func MustCompile(re string, flags ...ReFlag) Reg {
 	reg, err := Compile(re, flags...)
 	if err != nil {
-- 
2.30.2


From 9e12f9dcb32199b23efbd0c7e954daa3c962e1ea Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 15:38:26 -0500
Subject: [PATCH 40/48] Added field to Reg, denoting if we prefer longest match
 (POSIX style) or not (perl style)

---
 regex/compile.go | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/regex/compile.go b/regex/compile.go
index 9a703b9..da733de 100644
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -14,9 +14,10 @@ var notDotChars []rune
 // the startState of the NFA representation of the regex, and the number of capturing
 // groups in the regex. It also contains the expression string.
 type Reg struct {
-	start     *nfaState
-	numGroups int
-	str       string
+	start         *nfaState
+	numGroups     int
+	str           string
+	preferLongest bool
 }
 
 // NumSubexp returns the number of sub-expressions in the given [Reg]. This is equivalent
@@ -30,6 +31,10 @@ func (r Reg) String() string {
 	return r.str
 }
 
+func (r Reg) Longest() {
+	r.preferLongest = true
+}
+
 const concatRune rune = 0xF0001
 
 // Flags for shuntingYard - control its behavior
@@ -1135,7 +1140,7 @@ func thompson(re []postfixNode) (Reg, error) {
 	concatenate(nfa[0], &lastState)
 
 	// The string is empty here, because we add it in Compile()
-	return Reg{nfa[0], numGroups, ""}, nil
+	return Reg{nfa[0], numGroups, "", false}, nil
 
 }
 
-- 
2.30.2


From 1f5a36353934c0b78d2af207fceec720dcb8a8f9 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 15:39:09 -0500
Subject: [PATCH 41/48] Use new function signatures (with preferLongest)

---
 regex/nfa.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/regex/nfa.go b/regex/nfa.go
index db53c00..c649712 100644
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -133,7 +133,7 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 
 // Checks if the given state's assertion is true. Returns true if the given
 // state doesn't have an assertion.
-func (s nfaState) checkAssertion(str []rune, idx int) bool {
+func (s nfaState) checkAssertion(str []rune, idx int, preferLongest bool) bool {
 	if s.assert == alwaysTrueAssert {
 		return true
 	}
@@ -183,7 +183,7 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 			strToMatch = string(runesToMatch)
 		}
 
-		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex}
+		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex, preferLongest}
 		matchIndices := regComp.FindAll(strToMatch)
 
 		numMatchesFound := 0
@@ -210,9 +210,9 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 }
 
 // Returns true if the contents of 's' contain the value at the given index of the given string
-func (s nfaState) contentContains(str []rune, idx int) bool {
+func (s nfaState) contentContains(str []rune, idx int, preferLongest bool) bool {
 	if s.assert != noneAssert {
-		return s.checkAssertion(str, idx)
+		return s.checkAssertion(str, idx, preferLongest)
 	}
 	if idx >= len(str) {
 		return false
-- 
2.30.2


From fb47e082eb9950165d2d8c6ca91a1cecb111b0a4 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 15:39:35 -0500
Subject: [PATCH 42/48] Wrote new methods Expand() and preferLongest(); Use new
 function signatures (with preferLongest); only characters should be added to
 next state list

---
 regex/matching.go | 113 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 96 insertions(+), 17 deletions(-)

diff --git a/regex/matching.go b/regex/matching.go
index 1b07ee8..4d7c600 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -2,6 +2,8 @@ package regex
 
 import (
 	"fmt"
+	"strconv"
+	"unicode"
 )
 
 // A Match represents a match found by the regex in a given string.
@@ -77,6 +79,18 @@ func (regex Reg) Match(str string) bool {
 	return err == nil
 }
 
+// CompileMatch compiles expr and returns true if str contains a match of the expression.
+// It is equivalent to [regexp.Match].
+// An optional list of flags may be provided (see [ReFlag]).
+// It returns an error (!= nil) if there was an error compiling the expression.
+func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) {
+	re, err := Compile(expr, flags...)
+	if err != nil {
+		return false, err
+	}
+	return re.Match(str), nil
+}
+
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
 func (regex Reg) FindAll(str string) []Group {
@@ -162,7 +176,7 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
 	var matchFound bool
 	var matchIdx Match
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest)
 		if matchFound {
 			matchNum++
 		}
@@ -182,7 +196,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 	var matchIdx Match
 	indices := make([]Match, 0)
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest)
 		if matchFound {
 			indices = append(indices, matchIdx)
 		}
@@ -191,7 +205,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
 	return indices
 }
 
-func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState) []nfaState {
+func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
 	if stateExists(list, state) || stateExists(visited, state) {
 		return list
 	}
@@ -199,32 +213,32 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 
 	if state.isKleene || state.isQuestion {
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		return list
 	}
 	if state.isAlternation {
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		return list
 	}
 	state.threadGroups = append([]Group{}, threadGroups...)
 	if state.assert != noneAssert {
-		if state.checkAssertion(str, idx) {
+		if state.checkAssertion(str, idx, preferLongest) {
 			copyThread(state.next, state)
-			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
+			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 		}
 	}
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
-		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	if state.groupEnd {
 		state.threadGroups[state.groupNum].EndIdx = idx
-		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	return append(list, state)
 
@@ -233,7 +247,7 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 // Helper for FindAllMatches. Returns whether it found a match, the
 // first Match it finds, and how far it got into the string ie. where
 // the next search should start from.
-func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
+func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) {
 	// Base case - exit if offset exceeds string's length
 	if offset > len(str) {
 		// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
@@ -248,7 +262,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
 	if start.assert != noneAssert {
-		if start.checkAssertion(str, offset) == false {
+		if start.checkAssertion(str, offset, preferLongest) == false {
 			i++
 			return false, []Group{}, i
 		}
@@ -256,7 +270,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 
 	start.threadGroups = newMatch(numGroups + 1)
 	start.threadGroups[0].StartIdx = i
-	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil)
+	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest)
 	var match Match = nil
 	for idx := i; idx <= len(str); idx++ {
 		if len(currentStates) == 0 {
@@ -274,9 +288,9 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 				currentState.threadGroups[0].EndIdx = idx
 				match = append([]Group{}, currentState.threadGroups...)
 				break
-			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
-				if currentState.contentContains(str, idx) {
-					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil)
+			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
+				if currentState.contentContains(str, idx, preferLongest) {
+					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
 				}
 			}
 		}
@@ -291,3 +305,68 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	}
 	return false, []Group{}, i + 1
 }
+
+// Expand appends template to dst, expanding any variables in template to the relevant capturing group.
+//
+// A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
+// To insert a literal $, do not put a number after it. Alternatively, you can use $$.
+// src is the input string, and match must be the result of [Reg.FindSubmatch].
+func (regex Reg) Expand(dst string, template string, src string, match Match) string {
+	templateRuneSlc := []rune(template)
+	srcRuneSlc := []rune(src)
+	i := 0
+	for i < len(templateRuneSlc) {
+		c := templateRuneSlc[i]
+		if c == '$' {
+			i += 1
+			// The dollar sign is the last character of the string, or it is proceeded by another dollar sign
+			if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' {
+				dst += "$"
+				i++
+			} else {
+				numStr := ""
+				for unicode.IsDigit(templateRuneSlc[i]) {
+					numStr += string(templateRuneSlc[i])
+					i++
+				}
+				if numStr == "" {
+					dst += "$"
+				} else {
+					num, _ := strconv.Atoi(numStr)
+					if num < len(match) {
+						dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx])
+					} else {
+						dst += "$" + numStr
+					}
+				}
+			}
+		} else {
+			dst += string(c)
+			i++
+		}
+	}
+	return dst
+}
+
+// LiteralPrefix returns a string that must begin any match of the given regular expression.
+// The second return value is true if the string comprises the entire expression.
+func (regex Reg) LiteralPrefix() (prefix string, complete bool) {
+	state := regex.start
+	if state.assert != noneAssert {
+		state = state.next
+	}
+	for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert {
+		if state.groupBegin || state.groupEnd {
+			state = state.next
+			continue
+		}
+		prefix += string(rune(state.content[0]))
+		state = state.next
+	}
+	if state.isLast {
+		complete = true
+	} else {
+		complete = false
+	}
+	return prefix, complete
+}
-- 
2.30.2


From d522f50b502f77c536a23bea4a259b80a3fe3153 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 15:40:59 -0500
Subject: [PATCH 43/48] Wrote new example functions

---
 regex/example_test.go | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/regex/example_test.go b/regex/example_test.go
index f2443a2..60ed033 100644
--- a/regex/example_test.go
+++ b/regex/example_test.go
@@ -52,3 +52,29 @@ func ExampleReg_FindSubmatch() {
 	// 0	1
 	// 2	3
 }
+
+func ExampleReg_Expand() {
+	inputStr := `option1: value1
+	option2: value2`
+	regexStr := `(\w+): (\w+)`
+	templateStr := "$1 = $2\n"
+	regexComp := regex.MustCompile(regexStr, regex.RE_MULTILINE)
+	result := ""
+	for _, submatches := range regexComp.FindAllSubmatch(inputStr) {
+		result = regexComp.Expand(result, templateStr, inputStr, submatches)
+	}
+	fmt.Println(result)
+	// Output: option1 = value1
+	// option2 = value2
+
+}
+
+func ExampleReg_LiteralPrefix() {
+	regexStr := `a(b|c)d*`
+	regexComp := regex.MustCompile(regexStr)
+	prefix, complete := regexComp.LiteralPrefix()
+	fmt.Println(prefix)
+	fmt.Println(complete)
+	// Output: a
+	// false
+}
-- 
2.30.2


From af15904f3bf54da1d8ce9071eeddfc87888957bb Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 15:41:13 -0500
Subject: [PATCH 44/48] Updated documentation

---
 regex/doc.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/regex/doc.go b/regex/doc.go
index 9ca1f04..c5124e0 100644
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -173,6 +173,6 @@ The following features are not available in [regexp], but are supported in my en
  1. Lookarounds
  2. Numeric ranges
 
-The goal is to shorten the first list, and expand the second.
+I hope to shorten the first list, and expand the second.
 */
 package regex
-- 
2.30.2


From 9fbb99f86c74d6d83f6925fd57b9e4f55472d7cc Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 15:47:57 -0500
Subject: [PATCH 45/48] Wrote example for Longest()

---
 regex/example_test.go | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/regex/example_test.go b/regex/example_test.go
index 60ed033..8499dfc 100644
--- a/regex/example_test.go
+++ b/regex/example_test.go
@@ -78,3 +78,14 @@ func ExampleReg_LiteralPrefix() {
 	// Output: a
 	// false
 }
+
+func ExampleReg_Longest() {
+	regexStr := `x|xx`
+	inputStr := "xx"
+	regexComp := regex.MustCompile(regexStr)
+	fmt.Println(regexComp.FindString(inputStr))
+	regexComp.Longest()
+	fmt.Println(regexComp.FindString(inputStr))
+	// Output: x
+	// xx
+}
-- 
2.30.2


From b60ded41366a9e57b3a911a2770c50f11d856f4d Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 15:48:33 -0500
Subject: [PATCH 46/48] Don't break when a match is found, if we are looking
 for the longest match

---
 regex/matching.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/regex/matching.go b/regex/matching.go
index 4d7c600..0787572 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -287,7 +287,9 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			if currentState.isLast {
 				currentState.threadGroups[0].EndIdx = idx
 				match = append([]Group{}, currentState.threadGroups...)
-				break
+				if !preferLongest {
+					break
+				}
 			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
 				if currentState.contentContains(str, idx, preferLongest) {
 					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
-- 
2.30.2


From 15ee49f42eb43bd833b59536c68a99fa0de36166 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 15:51:46 -0500
Subject: [PATCH 47/48] Rename method receivers from 'regex' to 're' (it's
 shorter)

---
 regex/compile.go  | 12 ++++++------
 regex/matching.go | 44 ++++++++++++++++++++++----------------------
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/regex/compile.go b/regex/compile.go
index da733de..d9bef70 100644
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -22,17 +22,17 @@ type Reg struct {
 
 // NumSubexp returns the number of sub-expressions in the given [Reg]. This is equivalent
 // to the number of capturing groups.
-func (r Reg) NumSubexp() int {
-	return r.numGroups
+func (re Reg) NumSubexp() int {
+	return re.numGroups
 }
 
 // String returns the string used to compile the expression.
-func (r Reg) String() string {
-	return r.str
+func (re Reg) String() string {
+	return re.str
 }
 
-func (r Reg) Longest() {
-	r.preferLongest = true
+func (re *Reg) Longest() {
+	re.preferLongest = true
 }
 
 const concatRune rune = 0xF0001
diff --git a/regex/matching.go b/regex/matching.go
index 0787572..4d6b4e3 100644
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -65,8 +65,8 @@ func copyThread(to *nfaState, from nfaState) {
 
 // Find returns the 0-group of the leftmost match of the regex in the given string.
 // An error value != nil indicates that no match was found.
-func (regex Reg) Find(str string) (Group, error) {
-	match, err := regex.FindNthMatch(str, 1)
+func (re Reg) Find(str string) (Group, error) {
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Group{}, fmt.Errorf("no matches found")
 	}
@@ -74,8 +74,8 @@ func (regex Reg) Find(str string) (Group, error) {
 }
 
 // Match returns a boolean value, indicating whether the regex found a match in the given string.
-func (regex Reg) Match(str string) bool {
-	_, err := regex.Find(str)
+func (re Reg) Match(str string) bool {
+	_, err := re.Find(str)
 	return err == nil
 }
 
@@ -93,8 +93,8 @@ func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) {
 
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
-func (regex Reg) FindAll(str string) []Group {
-	indices := regex.FindAllSubmatch(str)
+func (re Reg) FindAll(str string) []Group {
+	indices := re.FindAllSubmatch(str)
 	zeroGroups := funcMap(indices, getZeroGroup)
 	return zeroGroups
 }
@@ -103,8 +103,8 @@ func (regex Reg) FindAll(str string) []Group {
 // The return value will be an empty string in two situations:
 //  1. No match was found
 //  2. The match was an empty string
-func (regex Reg) FindString(str string) string {
-	match, err := regex.FindNthMatch(str, 1)
+func (re Reg) FindString(str string) string {
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return ""
 	}
@@ -117,8 +117,8 @@ func (regex Reg) FindString(str string) string {
 // number of groups. The validity of a group (whether or not it matched anything) can be determined with
 // [Group.IsValid], or by checking that both indices of the group are >= 0.
 // The second-return value is nil if no match was found.
-func (regex Reg) FindSubmatch(str string) (Match, error) {
-	match, err := regex.FindNthMatch(str, 1)
+func (re Reg) FindSubmatch(str string) (Match, error) {
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Match{}, fmt.Errorf("no match found")
 	} else {
@@ -135,9 +135,9 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 //  2. Group n found a zero-length match
 //
 // A return value of nil indicates no match.
-func (regex Reg) FindStringSubmatch(str string) []string {
-	matchStr := make([]string, regex.numGroups+1)
-	match, err := regex.FindSubmatch(str)
+func (re Reg) FindStringSubmatch(str string) []string {
+	matchStr := make([]string, re.numGroups+1)
+	match, err := re.FindSubmatch(str)
 	if err != nil {
 		return nil
 	}
@@ -159,8 +159,8 @@ func (regex Reg) FindStringSubmatch(str string) []string {
 // FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
-func (regex Reg) FindAllString(str string) []string {
-	zerogroups := regex.FindAll(str)
+func (re Reg) FindAllString(str string) []string {
+	zerogroups := re.FindAll(str)
 	matchStrs := funcMap(zerogroups, func(g Group) string {
 		return str[g.StartIdx:g.EndIdx]
 	})
@@ -169,14 +169,14 @@ func (regex Reg) FindAllString(str string) []string {
 
 // FindNthMatch return the 'n'th match of the regex in the given string.
 // It returns an error (!= nil) if there are fewer than 'n' matches in the string.
-func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
+func (re Reg) FindNthMatch(str string, n int) (Match, error) {
 	idx := 0
 	matchNum := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			matchNum++
 		}
@@ -189,14 +189,14 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
 }
 
 // FindAllSubmatch returns a slice of matches in the given string.
-func (regex Reg) FindAllSubmatch(str string) []Match {
+func (re Reg) FindAllSubmatch(str string) []Match {
 	idx := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	indices := make([]Match, 0)
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			indices = append(indices, matchIdx)
 		}
@@ -313,7 +313,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 // A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
 // To insert a literal $, do not put a number after it. Alternatively, you can use $$.
 // src is the input string, and match must be the result of [Reg.FindSubmatch].
-func (regex Reg) Expand(dst string, template string, src string, match Match) string {
+func (re Reg) Expand(dst string, template string, src string, match Match) string {
 	templateRuneSlc := []rune(template)
 	srcRuneSlc := []rune(src)
 	i := 0
@@ -352,8 +352,8 @@ func (regex Reg) Expand(dst string, template string, src string, match Match) st
 
 // LiteralPrefix returns a string that must begin any match of the given regular expression.
 // The second return value is true if the string comprises the entire expression.
-func (regex Reg) LiteralPrefix() (prefix string, complete bool) {
-	state := regex.start
+func (re Reg) LiteralPrefix() (prefix string, complete bool) {
+	state := re.start
 	if state.assert != noneAssert {
 		state = state.next
 	}
-- 
2.30.2


From d1958f289c72441a148f0005c5ac76d58a0f8357 Mon Sep 17 00:00:00 2001
From: Aadhavan Srinivasan <aadhavan@twomorecents.org>
Date: Sun, 9 Feb 2025 16:08:16 -0500
Subject: [PATCH 48/48] Commented out tests that would only pass with Longest()

---
 regex/re_test.go | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/regex/re_test.go b/regex/re_test.go
index de6aaba..8b9fc8d 100644
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -25,7 +25,9 @@ var reTests = []struct {
 	{"a*b", nil, "qwqw", []Group{}},
 	{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
 	{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
-	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
+	// This match will only happen with Longest()
+	// {"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
+	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}},
 	{"b*a*a", nil, "bba", []Group{{0, 3}}},
 	{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
 	{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
@@ -537,7 +539,9 @@ var groupTests = []struct {
 	{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
 	{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	// This match will only happen with Longest()
+	//	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 3}, {0, 3}, {-1, -1}}}},
 	{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
 	{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
-- 
2.30.2