Commented out unused functions; use new nfaState parameters

Got rid of transitions parameter, changed how kleene state is processed
I replaced the transition parameter for nfaState, replacing it with a single nfaState pointer. This is because any non-alternation state will only have one next state, so the map was just added complexity. I changed alternation processing - instead of having their own dedicated fields, they just use the new 'next' parameter, and another one called 'splitState'. I also changed the kleene state processing to remove the unecessary empty state in the right-side alternation (it actually messed up my matching).
2025-02-05 22:23:31 -05:00 · 2025-02-05 22:20:28 -05:00 · 2025-02-05 18:01:36 -05:00 · 2025-02-05 12:21:12 -05:00 · 2025-02-05 11:32:20 -05:00 · 2025-02-04 14:09:24 -05:00
6 changed files with 634 additions and 349 deletions
--- a/4
+++ b/4
@@ -6,8 +6,8 @@ fmt:
 vet: fmt
 	go vet ./...
 buildLib: vet
-	go build -gcflags="-N -l" ./...
+	go build -gcflags="all=-N -l" ./...
 buildCmd: buildLib
-	go build -C cmd/ -gcflags="-N -l" -o re ./...
+	go build -C cmd/ -gcflags="all=-N -l" -o re ./...
 test: buildCmd
 	go test -v ./...
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -822,7 +822,6 @@ func thompson(re []postfixNode) (Reg, error) {
 	for _, c := range re {
 		if c.nodetype == characterNode || c.nodetype == assertionNode {
 			stateToAdd := nfaState{}
 			stateToAdd.transitions = make(map[int][]*nfaState)
 			if c.allChars {
 				stateToAdd.allChars = true
 				if len(c.except) != 0 {
@@ -934,7 +933,6 @@ func thompson(re []postfixNode) (Reg, error) {
 			s.isEmpty = true
 			s.output = make([]*nfaState, 0)
 			s.output = append(s.output, s)
 			s.transitions = make(map[int][]*nfaState)
 			// LPAREN nodes are just added normally
 			if c.nodetype == lparenNode {
 				numGroups++
@@ -966,7 +964,7 @@ func thompson(re []postfixNode) (Reg, error) {
 					s.groupNum = lparenNode.groupNum
 					to_add := concatenate(lparenNode, s)
 					nfa = append(nfa, to_add)
-				} else if middleNode.groupBegin && len(middleNode.transitions) == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
+				} else if middleNode.groupBegin && middleNode.numTransitions() == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
 					nfa = append(nfa, lparenNode)    // I shouldn't have popped this out, because it is not involved in the current capturing group
 					s.groupNum = middleNode.groupNum // In this case, the 'middle' node is actually an lparen
 					to_add := concatenate(middleNode, s)
@@ -1030,14 +1028,14 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, fmt.Errorf("error applying kleene star")
 			}
-			stateToAdd, err := kleene(*s1)
+			stateToAdd, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
 			nfa = append(nfa, stateToAdd)
 		case plusNode: // a+ is equivalent to aa*
 			s1 := mustPop(&nfa)
-			s2, err := kleene(*s1)
+			s2, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
@@ -1059,16 +1057,16 @@ func thompson(re []postfixNode) (Reg, error) {
 			// 	'|a'
 			// 	'^a|'
 			// 	'^|a'
-			s1, err1 := pop(&nfa)
+			s2, err1 := pop(&nfa)
-			s2, err2 := pop(&nfa)
+			s1, err2 := pop(&nfa)
-			if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err2 != nil || (s2.groupBegin && s2.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back
 					nfa = append(nfa, s2)
 				}
 				tmp := zeroLengthMatchState()
 				s2 = &tmp
 			}
-			if err1 != nil || (s1.groupBegin && len(s1.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err1 == nil { // See above for explanation
 					nfa = append(nfa, s1)
 				}
@@ -1100,7 +1098,7 @@ func thompson(re []postfixNode) (Reg, error) {
 				stateToAdd = concatenate(stateToAdd, cloneState(poppedState))
 			}
 			if c.endReps == infinite_reps { // Case 3
-				s2, err := kleene(*poppedState)
+				s2, err := kleene(poppedState)
 				if err != nil {
 					return Reg{}, err
 				}
@@ -1117,7 +1115,10 @@ func thompson(re []postfixNode) (Reg, error) {
 		return Reg{}, fmt.Errorf("invalid regex")
 	}
-	verifyLastStates(nfa)
+	lastState := newState()
 	lastState.isLast = true
 	concatenate(nfa[0], &lastState)
 	return Reg{nfa[0], numGroups}, nil
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -2,6 +2,7 @@ package regex
 import (
 	"fmt"
 	"slices"
 	"sort"
 )
@@ -73,58 +74,58 @@ func getZeroGroup(m Match) Group {
 // given slice. It returns the resulting states. If any of the resulting states is a 0-state,
 // the second ret val is true.
 // If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
-func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
+//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
-	for _, state := range states {
+//	for _, state := range states {
-		if len(state.transitions[epsilon]) > 0 {
+//		if len(state.transitions[epsilon]) > 0 {
-			for _, s := range state.transitions[epsilon] {
+//			for _, s := range state.transitions[epsilon] {
-				if s.threadGroups == nil {
+//				if s.threadGroups == nil {
-					s.threadGroups = newMatch(numGroups + 1)
+//					s.threadGroups = newMatch(numGroups + 1)
-				}
+//				}
-				copy(s.threadGroups, state.threadGroups)
+//				copy(s.threadGroups, state.threadGroups)
-				if s.groupBegin {
+//				if s.groupBegin {
-					s.threadGroups[s.groupNum].StartIdx = idx
+//					s.threadGroups[s.groupNum].StartIdx = idx
-					//					openParenGroups = append(openParenGroups, s.groupNum)
+//					//					openParenGroups = append(openParenGroups, s.groupNum)
-				}
+//				}
-				if s.groupEnd {
+//				if s.groupEnd {
-					s.threadGroups[s.groupNum].EndIdx = idx
+//					s.threadGroups[s.groupNum].EndIdx = idx
-					//					closeParenGroups = append(closeParenGroups, s.groupNum)
+//					//					closeParenGroups = append(closeParenGroups, s.groupNum)
-				}
+//				}
-			}
+//			}
-			rtv = append(rtv, state.transitions[epsilon]...)
+//			rtv = append(rtv, state.transitions[epsilon]...)
-		}
+//		}
-	}
+//	}
-	for _, state := range rtv {
+//	for _, state := range rtv {
-		if len(state.transitions[epsilon]) > 0 {
+//		if len(state.transitions[epsilon]) > 0 {
-			return rtv, true
+//			return rtv, true
-		}
+//		}
-	}
+//	}
-	return rtv, false
+//	return rtv, false
-}
+//}
 // zeroMatchPossible returns true if a zero-length match is possible
 // from any of the given states, given the string and our position in it.
 // It uses the same algorithm to find zero-states as the one inside the loop,
 // so I should probably put it in a function.
-func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
+//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
-	zeroStates, isZero := takeZeroState(states, numGroups, idx)
+//	zeroStates, isZero := takeZeroState(states, numGroups, idx)
-	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
+//	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
-	tempstates = append(tempstates, states...)
+//	tempstates = append(tempstates, states...)
-	tempstates = append(tempstates, zeroStates...)
+//	tempstates = append(tempstates, zeroStates...)
-	num_appended := 0 // number of unique states addded to tempstates
+//	num_appended := 0 // number of unique states addded to tempstates
-	for isZero == true {
+//	for isZero == true {
-		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
+//		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
-		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
+//		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
-		if num_appended == 0 { // break if we haven't appended any more unique values
+//		if num_appended == 0 { // break if we haven't appended any more unique values
-			break
+//			break
-		}
+//		}
-	}
+//	}
-	for _, state := range tempstates {
+//	for _, state := range tempstates {
-		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
+//		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
-			return true
+//			return true
-		}
+//		}
-	}
+//	}
-	return false
+//	return false
-}
+//}
 // Prunes the slice by removing overlapping indices.
 func pruneIndices(indices []Match) []Match {
@@ -150,6 +151,11 @@ func pruneIndices(indices []Match) []Match {
 	return toRet
 }
 func copyThread(to *nfaState, from nfaState) {
 	to.threadSP = from.threadSP
 	to.threadGroups = append([]Group{}, from.threadGroups...)
 }
 // Find returns the 0-group of the leftmost match of the regex in the given string.
 // An error value != nil indicates that no match was found.
 func (regex Reg) Find(str string) (Group, error) {
@@ -265,15 +271,16 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	// chosen as the match for the entire string.
 	// This allows us to pick the longest possible match (which is how greedy matching works).
 	// COMMENT ABOVE IS CURRENTLY NOT UP-TO-DATE
-	tempIndices := newMatch(numGroups + 1)
+	//	tempIndices := newMatch(numGroups + 1)
-	foundPath := false
+	//	foundPath := false
-	startIdx := offset
+	//startIdx := offset
-	endIdx := offset
+	//endIdx := offset
-	currentStates := make([]*nfaState, 0)
+	currentStates := make([]nfaState, 0)
-	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
+	nextStates := make([]nfaState, 0)
-	i := offset                        // Index in string
+	//	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
-	startingFrom := i                  // Store starting index
+	i := offset // Index in string
 	//startingFrom := i                  // Store starting index
 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
@@ -284,181 +291,339 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 		}
 	}
 	// Increment until we hit a character matching the start state (assuming not 0-state)
-	if start.isEmpty == false {
+	//	if start.isEmpty == false {
-		for i < len(str) && !start.contentContains(str, i) {
+	//		for i < len(str) && !start.contentContains(str, i) {
-			i++
+	//			i++
-		}
+	//		}
-		startIdx = i
+	//		startIdx = i
-		startingFrom = i
+	//		startingFrom = i
-		i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
+	//		i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
-	}
+	//	}
-	start.threadGroups = newMatch(numGroups + 1)
+	//	start.threadGroups = newMatch(numGroups + 1)
 	// Check if the start state begins a group - if so, add the start index to our list
-	if start.groupBegin {
+	//if start.groupBegin {
-		start.threadGroups[start.groupNum].StartIdx = i
+	//		start.threadGroups[start.groupNum].StartIdx = i
-		//		tempIndices[start.groupNum].startIdx = i
+	//		tempIndices[start.groupNum].startIdx = i
-	}
+	//}
 	currentStates = append(currentStates, start)
 	start.threadSP = i
 	currentStates = append(currentStates, *start)
 	var foundMatch bool
 	var isEmptyAndNoAssertion bool
 	// Main loop
-	for i < len(str) {
+	for idx := i; idx <= len(str); idx++ {
-		foundPath = false
+		for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
 			currentState := currentStates[currentStateIdx]
 			foundMatch = false
 			isEmptyAndNoAssertion = false
-		zeroStates := make([]*nfaState, 0)
+			if currentState.threadGroups == nil {
-		// Keep taking zero-states, until there are no more left to take
+				currentState.threadGroups = newMatch(numGroups + 1)
-		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
+				currentState.threadGroups[0].StartIdx = idx
 		zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
 		tempStates = append(tempStates, zeroStates...)
 		num_appended := 0
 		for isZero == true {
 			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
 			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 			if num_appended == 0 { // Break if we haven't appended any more unique values
 				break
 			}
 		}
-		currentStates, _ = uniqueAppend(currentStates, tempStates...)
+			if currentState.groupBegin {
-		tempStates = nil
+				currentState.threadGroups[currentState.groupNum].StartIdx = idx
 				//		allMatches := make([]nfaState, 0)
 				//		for _, v := range currentState.transitions {
 				//			dereferenced := funcMap(v, func(s *nfaState) nfaState {
 				//				return *s
 				//			})
 				//			allMatches = append(allMatches, dereferenced...)
 				//		}
 				//		slices.Reverse(allMatches)
 				//		for i := range allMatches {
 				//			copyThread(&allMatches[i], currentState)
 				//		}
 				//		currentStates = append(currentStates, allMatches...)
 			}
 			if currentState.groupEnd {
 				currentState.threadGroups[currentState.groupNum].EndIdx = idx
 				//			allMatches := make([]nfaState, 0)
 				//			for _, v := range currentState.transitions {
 				//				dereferenced := funcMap(v, func(s *nfaState) nfaState {
 				//					return *s
 				//				})
 				//				allMatches = append(allMatches, dereferenced...)
 				//			}
 				//			slices.Reverse(allMatches)
 				//			for i := range allMatches {
 				//				copyThread(&allMatches[i], currentState)
 				//			}
 				//			currentStates = append(currentStates, allMatches...)
 			}
-		// Take any transitions corresponding to current character
+			//		if currentState.isKleene {
-		numStatesMatched := 0            // The number of states which had at least 1 match for this round
+			//			// Append the next-state (after the kleene), then append the kleene state
-		assertionFailed := false         // Whether or not an assertion failed for this round
+			//			allMatches := make([]*nfaState, 0)
-		lastStateInList := false         // Whether or not a last state was in our list of states
+			//			for _, v := range currentState.transitions {
-		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
+			//				allMatches = append(allMatches, v...)
 		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
 		for _, state := range currentStates {
 			matches, numMatches := state.matchesFor(str, i)
 			if numMatches > 0 {
 				numStatesMatched++
 				tempStates = append(tempStates, matches...)
 				foundPath = true
 				for _, m := range matches {
 					if m.threadGroups == nil {
 						m.threadGroups = newMatch(numGroups + 1)
 					}
 					copy(m.threadGroups, state.threadGroups)
 				}
 			}
 			if numMatches < 0 {
 				assertionFailed = true
 			}
 			if state.isLast {
 				if state.isLookaround() {
 					lastLookaroundInList = true
 				}
 				lastStateInList = true
 				lastStatePtr = state
 			}
 		}
 		if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
 			// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
 			// state. The explanation below is my attempt to explain this behavior.
 			// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
 			//
 			// One of the states in our list was a last state and a lookaround. In this case, we
 			// don't abort upon failure of the assertion, because we have found
 			// another path to a final state.
 			// Even if the last state _was_ an assertion, we can use the previously
 			// saved indices to find a match.
 			if lastLookaroundInList {
 				break
 			} else {
 				if i == startingFrom {
 					i++
 				}
 				return false, []Group{}, i
 			}
 		}
 		// Check if we can find a state in our list that is:
 		// 	a. A last-state
 		// 	b. Empty
 		// 	c. Doesn't assert anything
 		for _, s := range currentStates {
 			if s.isLast && s.isEmpty && s.assert == noneAssert {
 				lastStatePtr = s
 				lastStateInList = true
 			}
 		}
 		if lastStateInList { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
 			for j := 1; j < numGroups+1; j++ {
 				tempIndices[j] = lastStatePtr.threadGroups[j]
 			}
 			endIdx = i
 			tempIndices[0] = Group{startIdx, endIdx}
 		}
 		// Check if we can find a zero-length match
 		if foundPath == false {
 			if ok := zeroMatchPossible(str, i, numGroups, currentStates...); ok {
 				if tempIndices[0].IsValid() == false {
 					tempIndices[0] = Group{startIdx, startIdx}
 				}
 			}
 			// If we haven't moved in the string, increment the counter by 1
 			// to ensure we don't keep trying the same string over and over.
 			//			if i == startingFrom {
 			startIdx++
 			//	i++
 			//			}
-			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
+			//			slices.Reverse(allMatches)
-				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
+			//			for _, m := range allMatches {
-					return true, tempIndices, tempIndices[0].EndIdx + 1
+			//				m.threadGroups = currentState.threadGroups
 			//				m.threadSP = idx
 			//			}
 			//			currentStates = append(currentStates, allMatches...)
 			//
 			//			//	kleeneState := currentState.kleeneState
 			//			//	kleeneState.threadGroups = currentState.threadGroups
 			//			//	kleeneState.threadSP = currentState.threadSP
 			//			//	currentStates = append(currentStates, kleeneState)
 			//			continue
 			//		}
 			// Alternation - enqueue left then right state, and continue
 			if currentState.isAlternation {
 				if currentState.isKleene { // Reverse order of adding things
 					rightState := currentState.splitState
 					copyThread(rightState, currentState)
 					currentStates = append(currentStates, *currentState.splitState)
 					leftState := currentState.next
 					copyThread(leftState, currentState)
 					currentStates = append(currentStates, *currentState.next)
 				} else {
-					return true, tempIndices, tempIndices[0].EndIdx
+					leftState := currentState.next
 					copyThread(leftState, currentState)
 					currentStates = append(currentStates, *currentState.next)
 					rightState := currentState.splitState
 					copyThread(rightState, currentState)
 					currentStates = append(currentStates, *currentState.splitState)
 				}
 				continue
 			}
 			// Empty state - enqueue next state, do _not_ increment the SP
 			if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
 				isEmptyAndNoAssertion = true
 			}
 			if currentState.contentContains(str, idx) {
 				foundMatch = true
 			}
 			if isEmptyAndNoAssertion || foundMatch {
 				allMatches := make([]nfaState, 0)
 				allMatches = append(allMatches, *(currentState.next))
 				slices.Reverse(allMatches)
 				for i := range allMatches {
 					copyThread(&allMatches[i], currentState)
 					if foundMatch && currentState.assert == noneAssert {
 						allMatches[i].threadSP += 1
 					}
 				}
 				if currentState.groupBegin {
 					currentStates = slices.Insert(currentStates, currentStateIdx+1, allMatches...)
 				} else if currentState.groupEnd {
 					currentStates = append(currentStates, allMatches...)
 				} else {
 					nextStates = append(nextStates, allMatches...)
 				}
 			}
 			return false, []Group{}, startIdx
 		}
 		currentStates = make([]*nfaState, len(tempStates))
 		copy(currentStates, tempStates)
 		tempStates = nil
-		i++
+			if currentState.isLast && len(nextStates) == 0 { // Last state reached
-	}
+				currentState.threadGroups[0].EndIdx = idx
-
+				if idx == currentState.threadGroups[0].StartIdx {
-	// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
+					idx += 1
 	// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
 	zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
 	tempStates = append(tempStates, zeroStates...)
 	num_appended := 0 // Number of unique states addded to tempStates
 	for isZero == true {
 		zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
 		tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 		if num_appended == 0 { // Break if we haven't appended any more unique values
 			break
 		}
 	}
 	currentStates = append(currentStates, tempStates...)
 	tempStates = nil
 	for _, state := range currentStates {
 		// Only add the match if the start index is in bounds. If the state has an assertion,
 		// make sure the assertion checks out.
 		if state.isLast && i <= len(str) {
 			if state.assert == noneAssert || state.checkAssertion(str, i) {
 				for j := 1; j < numGroups+1; j++ {
 					tempIndices[j] = state.threadGroups[j]
 				}
-				endIdx = i
+				return true, currentState.threadGroups, idx
 				tempIndices[0] = Group{startIdx, endIdx}
 			}
 		}
 		currentStates = append([]nfaState{}, nextStates...)
 		nextStates = nil
 	}
-
+	return false, []Group{}, i + 1
-	if tempIndices.numValidGroups() > 0 {
+	//		zeroStates := make([]*nfaState, 0)
-		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
+	//		// Keep taking zero-states, until there are no more left to take
-			return true, tempIndices, tempIndices[0].EndIdx + 1
+	//		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
-		} else {
+	//		topStateItem := currentStates.peek()
-			return true, tempIndices, tempIndices[0].EndIdx
+	//		topState := topStateItem.(*priorQueueItem).state
-		}
+	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
-	}
+	//		tempStates = append(tempStates, zeroStates...)
-	if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
+	//		num_appended := 0
-		startIdx++
+	//		for isZero == true {
-	}
+	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
-	return false, []Group{}, startIdx
+	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 	//			if num_appended == 0 { // Break if we haven't appended any more unique values
 	//				break
 	//			}
 	//		}
 	//		if isZero == true {
 	//			currentStates.Pop()
 	//		}
 	//
 	//		for _, state := range tempStates {
 	//			heap.Push(currentStates, newPriorQueueItem(state))
 	//		}
 	//		tempStates = nil
 	//
 	//		// Take any transitions corresponding to current character
 	//		numStatesMatched := 0            // The number of states which had at least 1 match for this round
 	//		assertionFailed := false         // Whether or not an assertion failed for this round
 	//		lastStateInList := false         // Whether or not a last state was in our list of states
 	//		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
 	//		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
 	//		for numStatesMatched == 0 && lastStateInList == false {
 	//			if currentStates.Len() == 0 {
 	//				break
 	//			}
 	//			stateItem := heap.Pop(currentStates)
 	//			state := stateItem.(*priorQueueItem).state
 	//			matches, numMatches := state.matchesFor(str, i)
 	//			if numMatches > 0 {
 	//				numStatesMatched++
 	//				tempStates = append([]*nfaState(nil), matches...)
 	//				foundPath = true
 	//				for _, m := range matches {
 	//					if m.threadGroups == nil {
 	//						m.threadGroups = newMatch(numGroups + 1)
 	//					}
 	//					m.threadSP = state.threadSP + 1
 	//					copy(m.threadGroups, state.threadGroups)
 	//				}
 	//			}
 	//			if numMatches < 0 {
 	//				assertionFailed = true
 	//			}
 	//			if state.isLast {
 	//				if state.isLookaround() {
 	//					lastLookaroundInList = true
 	//				}
 	//				lastStateInList = true
 	//				lastStatePtr = state
 	//			}
 	//		}
 	//
 	//		if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
 	//			// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
 	//			// state. The explanation below is my attempt to explain this behavior.
 	//			// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
 	//			//
 	//			// One of the states in our list was a last state and a lookaround. In this case, we
 	//			// don't abort upon failure of the assertion, because we have found
 	//			// another path to a final state.
 	//			// Even if the last state _was_ an assertion, we can use the previously
 	//			// saved indices to find a match.
 	//			if lastLookaroundInList {
 	//				break
 	//			} else {
 	//				if i == startingFrom {
 	//					i++
 	//				}
 	//				return false, []Group{}, i
 	//			}
 	//		}
 	//		// Check if we can find a state in our list that is:
 	//		// 	a. A last-state
 	//		// 	b. Empty
 	//		// 	c. Doesn't assert anything
 	//		for _, stateItem := range *currentStates {
 	//			s := stateItem.state
 	//			if s.isLast && s.isEmpty && s.assert == noneAssert {
 	//				lastStatePtr = s
 	//				lastStateInList = true
 	//			}
 	//		}
 	//		if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
 	//			for j := 1; j < numGroups+1; j++ {
 	//				tempIndices[j] = lastStatePtr.threadGroups[j]
 	//			}
 	//			endIdx = i
 	//			tempIndices[0] = Group{startIdx, endIdx}
 	//			if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
 	//				return true, tempIndices, tempIndices[0].EndIdx + 1
 	//			} else {
 	//				return true, tempIndices, tempIndices[0].EndIdx
 	//			}
 	//		}
 	//
 	//		// Check if we can find a zero-length match
 	//		if foundPath == false {
 	//			currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
 	//				return item.state
 	//			})
 	//			if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
 	//				if tempIndices[0].IsValid() == false {
 	//					tempIndices[0] = Group{startIdx, startIdx}
 	//				}
 	//			}
 	//			// If we haven't moved in the string, increment the counter by 1
 	//			// to ensure we don't keep trying the same string over and over.
 	//			//			if i == startingFrom {
 	//			startIdx++
 	//			//	i++
 	//			//			}
 	//			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
 	//				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
 	//					return true, tempIndices, tempIndices[0].EndIdx + 1
 	//				} else {
 	//					return true, tempIndices, tempIndices[0].EndIdx
 	//				}
 	//			}
 	//			return false, []Group{}, startIdx
 	//		}
 	//		currentStates = &priorityQueue{}
 	//		slices.Reverse(tempStates)
 	//		for _, state := range tempStates {
 	//			heap.Push(currentStates, newPriorQueueItem(state))
 	//		}
 	//		tempStates = nil
 	//
 	//		i++
 	//	}
 	//
 	// // End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
 	// // This is the exact same algorithm used inside the loop, so I should probably put it in a function.
 	//
 	//	if currentStates.Len() > 0 {
 	//		topStateItem := currentStates.peek()
 	//		topState := topStateItem.(*priorQueueItem).state
 	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
 	//		tempStates = append(tempStates, zeroStates...)
 	//		num_appended := 0 // Number of unique states addded to tempStates
 	//		for isZero == true {
 	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
 	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 	//			if num_appended == 0 { // Break if we haven't appended any more unique values
 	//				break
 	//			}
 	//		}
 	//	}
 	//
 	//	for _, state := range tempStates {
 	//		heap.Push(currentStates, newPriorQueueItem(state))
 	//	}
 	//
 	// tempStates = nil
 	//
 	//	for _, stateItem := range *currentStates {
 	//		state := stateItem.state
 	//		// Only add the match if the start index is in bounds. If the state has an assertion,
 	//		// make sure the assertion checks out.
 	//		if state.isLast && i <= len(str) {
 	//			if state.assert == noneAssert || state.checkAssertion(str, i) {
 	//				for j := 1; j < numGroups+1; j++ {
 	//					tempIndices[j] = state.threadGroups[j]
 	//				}
 	//				endIdx = i
 	//				tempIndices[0] = Group{startIdx, endIdx}
 	//			}
 	//		}
 	//	}
 	//
 	//	if tempIndices.numValidGroups() > 0 {
 	//		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
 	//			return true, tempIndices, tempIndices[0].EndIdx + 1
 	//		} else {
 	//			return true, tempIndices, tempIndices[0].EndIdx
 	//		}
 	//	}
 	//
 	// if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
 	//
 	//		startIdx++
 	//	}
 	//
 	// return false, []Group{}, startIdx
 }
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -25,24 +25,29 @@ const (
 )
 type nfaState struct {
-	content                    stateContents       // Contents of current state
+	content stateContents // Contents of current state
-	isEmpty                    bool                // If it is empty - Union operator and Kleene star states will be empty
+	isEmpty bool          // If it is empty - Union operator and Kleene star states will be empty
-	isLast                     bool                // If it is the last state (acept state)
+	isLast  bool          // If it is the last state (acept state)
-	output                     []*nfaState         // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
+	output  []*nfaState   // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
-	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
+	//	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
-	isKleene                   bool                // Identifies whether current node is a 0-state representing Kleene star
+	next                       *nfaState  // The next state (not for alternation or kleene states)
-	assert                     assertType          // Type of assertion of current node - NONE means that the node doesn't assert anything
+	isKleene                   bool       // Identifies whether current node is a 0-state representing Kleene star
-	allChars                   bool                // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
+	isQuestion                 bool       // Identifies whether current node is a 0-state representing the question operator
-	except                     []rune              // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
+	isAlternation              bool       // Identifies whether current node is a 0-state representing an alternation
-	lookaroundRegex            string              // Only for lookaround states - Contents of the regex that the lookaround state holds
+	splitState                 *nfaState  // Only for alternation states - the 'other' branch of the alternation ('next' is the first)
-	lookaroundNFA              *nfaState           // Holds the NFA of the lookaroundRegex - if it exists
+	assert                     assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
-	lookaroundNumCaptureGroups int                 // Number of capturing groups in lookaround regex if current node is a lookaround
+	allChars                   bool       // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
-	groupBegin                 bool                // Whether or not the node starts a capturing group
+	except                     []rune     // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
-	groupEnd                   bool                // Whether or not the node ends a capturing group
+	lookaroundRegex            string     // Only for lookaround states - Contents of the regex that the lookaround state holds
-	groupNum                   int                 // Which capturing group the node starts / ends
+	lookaroundNFA              *nfaState  // Holds the NFA of the lookaroundRegex - if it exists
 	lookaroundNumCaptureGroups int        // Number of capturing groups in lookaround regex if current node is a lookaround
 	groupBegin                 bool       // Whether or not the node starts a capturing group
 	groupEnd                   bool       // Whether or not the node ends a capturing group
 	groupNum                   int        // Which capturing group the node starts / ends
 	// The following properties depend on the current match - I should think about resetting them for every match.
 	zeroMatchFound bool    // Whether or not the state has been used for a zero-length match - only relevant for zero states
 	threadGroups   []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
 	threadSP       int     // The string pointer of the thread - where it is in the input string
 }
 // Clones the NFA starting from the given state.
@@ -68,8 +73,9 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 		isEmpty:         stateToClone.isEmpty,
 		isLast:          stateToClone.isLast,
 		output:          make([]*nfaState, len(stateToClone.output)),
 		transitions:     make(map[int][]*nfaState),
 		isKleene:        stateToClone.isKleene,
 		isQuestion:      stateToClone.isQuestion,
 		isAlternation:   stateToClone.isAlternation,
 		assert:          stateToClone.assert,
 		zeroMatchFound:  stateToClone.zeroMatchFound,
 		allChars:        stateToClone.allChars,
@@ -87,20 +93,18 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 			clone.output[i] = cloneStateHelper(s, cloneMap)
 		}
 	}
 	for k, v := range stateToClone.transitions {
 		clone.transitions[k] = make([]*nfaState, len(v))
 		for i, s := range v {
 			if s == stateToClone {
 				clone.transitions[k][i] = clone
 			} else {
 				clone.transitions[k][i] = cloneStateHelper(s, cloneMap)
 			}
 		}
 	}
 	if stateToClone.lookaroundNFA == stateToClone {
 		clone.lookaroundNFA = clone
 	}
 	clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap)
 	if stateToClone.splitState == stateToClone {
 		clone.splitState = clone
 	}
 	clone.splitState = cloneStateHelper(stateToClone.splitState, cloneMap)
 	if stateToClone.next == stateToClone {
 		clone.next = clone
 	}
 	clone.next = cloneStateHelper(stateToClone.next, cloneMap)
 	return clone
 }
@@ -111,16 +115,21 @@ func resetThreads(start *nfaState) {
 }
 func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	if state == nil {
 		return
 	}
 	if _, ok := visitedMap[state]; ok {
 		return
 	}
 	// Assuming it hasn't been visited
 	state.threadGroups = nil
 	state.threadSP = 0
 	visitedMap[state] = true
-	for _, v := range state.transitions {
+	if state.isAlternation {
-		for _, nextState := range v {
+		resetThreadsHelper(state.next, visitedMap)
-			resetThreadsHelper(nextState, visitedMap)
+		resetThreadsHelper(state.splitState, visitedMap)
-		}
+	} else {
 		resetThreadsHelper(state.next, visitedMap)
 	}
 }
@@ -207,6 +216,9 @@ func (s nfaState) contentContains(str []rune, idx int) bool {
 	if s.assert != noneAssert {
 		return s.checkAssertion(str, idx)
 	}
 	if idx >= len(str) {
 		return false
 	}
 	if s.allChars {
 		return !slices.Contains(slices.Concat(notDotChars, s.except), str[idx]) // Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
 	}
@@ -218,74 +230,84 @@ func (s nfaState) isLookaround() bool {
 	return s.assert == plaAssert || s.assert == plbAssert || s.assert == nlaAssert || s.assert == nlbAssert
 }
 func (s nfaState) numTransitions() int {
 	if s.next == nil && s.splitState == nil {
 		return 0
 	}
 	if s.next == nil || s.splitState == nil {
 		return 1
 	}
 	return 2
 }
 // Returns the matches for the character at the given index of the given string.
 // Also returns the number of matches. Returns -1 if an assertion failed.
-func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
+//func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
-	// Assertions can be viewed as 'checks'. If the check fails, we return
+//	// Assertions can be viewed as 'checks'. If the check fails, we return
-	// an empty array and 0.
+//	// an empty array and 0.
-	// If it passes, we treat it like any other state, and return all the transitions.
+//	// If it passes, we treat it like any other state, and return all the transitions.
-	if s.assert != noneAssert {
+//	if s.assert != noneAssert {
-		if s.checkAssertion(str, idx) == false {
+//		if s.checkAssertion(str, idx) == false {
-			return make([]*nfaState, 0), -1
+//			return make([]*nfaState, 0), -1
-		}
+//		}
-	}
+//	}
-	listTransitions := s.transitions[int(str[idx])]
+//	listTransitions := s.transitions[int(str[idx])]
-	for _, dest := range s.transitions[int(anyCharRune)] {
+//	for _, dest := range s.transitions[int(anyCharRune)] {
-		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
+//		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
-			// Add an allChar state to the list of matches if:
+//			// Add an allChar state to the list of matches if:
-			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
+//			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
-			// 		b. The current character isn't the state's exception list.
+//			// 		b. The current character isn't the state's exception list.
-			listTransitions = append(listTransitions, dest)
+//			listTransitions = append(listTransitions, dest)
-		}
+//		}
-	}
+//	}
-	numTransitions := len(listTransitions)
+//	numTransitions := len(listTransitions)
-	return listTransitions, numTransitions
+//	return listTransitions, numTransitions
-}
+//}
 // verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
-func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
+//func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
-	if len(st.transitions) == 0 {
+//	if st.numTransitions() == 0 {
-		st.isLast = true
+//		st.isLast = true
-		return
+//		return
-	}
+//	}
-	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
+//	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
-	if len(st.transitions) == 1 { // Eg. a*
+//	if st.numTransitions() == 1 { // Eg. a*
-		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
+//		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
-		for _, c := range st.content {
+//		for _, c := range st.content {
-			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
+//			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
-				moreThanOneTrans = true
+//				moreThanOneTrans = true
-			}
+//			}
-		}
+//		}
-		st.isLast = !moreThanOneTrans
+//		st.isLast = !moreThanOneTrans
-	}
+//	}
-
+//
-	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
+//	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
-		transitionDests := make([]*nfaState, 0)
+//		transitionDests := make([]*nfaState, 0)
-		for _, v := range st.transitions {
+//		for _, v := range st.transitions {
-			transitionDests = append(transitionDests, v...)
+//			transitionDests = append(transitionDests, v...)
-		}
+//		}
-		if allEqual(transitionDests...) {
+//		if allEqual(transitionDests...) {
-			st.isLast = true
+//			st.isLast = true
-			return
+//			return
-		}
+//		}
-	}
+//	}
-	if visited[st] == true {
+//	if visited[st] == true {
-		return
+//		return
-	}
+//	}
-	visited[st] = true
+//	visited[st] = true
-	for _, states := range st.transitions {
+//	for _, states := range st.transitions {
-		for i := range states {
+//		for i := range states {
-			if states[i] != st {
+//			if states[i] != st {
-				verifyLastStatesHelper(states[i], visited)
+//				verifyLastStatesHelper(states[i], visited)
-			}
+//			}
-		}
+//		}
-	}
+//	}
-}
+//}
 // verifyLastStates enables the 'isLast' flag for the leaf nodes (last states)
-func verifyLastStates(start []*nfaState) {
+//func verifyLastStates(start []*nfaState) {
-	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
+//	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
-}
+//}
 // Concatenates s1 and s2, returns the start of the concatenation.
 func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
@@ -293,61 +315,69 @@ func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
 		return s2
 	}
 	for i := range s1.output {
-		for _, c := range s2.content { // Create transitions for every element in s1's content to s2'
+		s1.output[i].next = s2
 			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], s2)
 		}
 	}
 	s1.output = s2.output
 	return s1
 }
-func kleene(s1 nfaState) (*nfaState, error) {
+func kleene(s1 *nfaState) (*nfaState, error) {
 	if s1.isEmpty && s1.assert != noneAssert {
 		return nil, fmt.Errorf("previous token is not quantifiable")
 	}
 	toReturn := &nfaState{}
 	toReturn.transitions = make(map[int][]*nfaState)
 	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
-	toReturn.isKleene = true
+	toReturn.isAlternation = true
-	toReturn.output = append(toReturn.output, toReturn)
+	toReturn.content = newContents(epsilon)
 	toReturn.splitState = s1
 	for i := range s1.output {
-		for _, c := range toReturn.content {
+		s1.output[i].next = toReturn
 			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], toReturn)
 		}
 	}
-	for _, c := range s1.content {
+
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
+	//	toReturn := &nfaState{}
 	//	toReturn.transitions = make(map[int][]*nfaState)
 	//	toReturn.content = newContents(epsilon)
 	toReturn.isKleene = true
 	toReturn.output = append([]*nfaState{}, toReturn)
 	for i := range s1.output {
 		s1.output[i].next = toReturn
 	}
 	//	for _, c := range s1.content {
 	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
 	//	}
 	//toReturn.kleeneState = &s1
 	return toReturn, nil
 }
 func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
 	toReturn := &nfaState{}
-	toReturn.transitions = make(map[int][]*nfaState)
+	//	toReturn.transitions = make(map[int][]*nfaState)
 	toReturn.output = append(toReturn.output, s1.output...)
 	toReturn.output = append(toReturn.output, s2.output...)
-	// Unique append is used here (and elsewhere) to ensure that,
+	//	// Unique append is used here (and elsewhere) to ensure that,
-	// for any given transition, a state can only be mentioned once.
+	//	// for any given transition, a state can only be mentioned once.
-	// For example, given the transition 'a', the state 's1' can only be mentioned once.
+	//	// For example, given the transition 'a', the state 's1' can only be mentioned once.
-	// This would lead to multiple instances of the same set of match indices, since both
+	//	// This would lead to multiple instances of the same set of match indices, since both
-	// 's1' states would be considered to match.
+	//	// 's1' states would be considered to match.
-	for _, c := range s1.content {
+	//	for _, c := range s1.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
-	}
+	//	}
-	for _, c := range s2.content {
+	//	for _, c := range s2.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
-	}
+	//	}
 	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
 	toReturn.isAlternation = true
 	toReturn.next = s1
 	toReturn.splitState = s2
 	return toReturn
 }
 func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
 	s2 := &nfaState{}
-	s2.transitions = make(map[int][]*nfaState)
+	//	s2.transitions = make(map[int][]*nfaState)
 	s2.content = newContents(epsilon)
 	s2.output = append(s2.output, s2)
 	s2.isEmpty = true
@@ -358,8 +388,8 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
 // Creates and returns a new state with the 'default' values.
 func newState() nfaState {
 	ret := nfaState{
-		output:          make([]*nfaState, 0),
+		output: make([]*nfaState, 0),
-		transitions:     make(map[int][]*nfaState),
+		//		transitions:     make(map[int][]*nfaState),
 		assert:          noneAssert,
 		except:          append([]rune{}, 0),
 		lookaroundRegex: "",
--- a/regex/priorityQueue.go
+++ b/regex/priorityQueue.go
@@ -0,0 +1,89 @@
 package regex
 import "container/heap"
 // Implement a priority queue using container/heap
 const (
 	min_priority int = iota
 	zerostate_priority
 	alternation_priority
 	kleene_priority
 	char_priority
 	max_priority
 )
 func getPriority(state *nfaState) int {
 	if state.isKleene {
 		return zerostate_priority
 	} else if state.isAlternation {
 		return alternation_priority
 	} else {
 		if state.isEmpty {
 			return zerostate_priority
 		} else {
 			return char_priority
 		}
 	}
 }
 type priorQueueItem struct {
 	state    *nfaState
 	priority int
 	index    int
 }
 func newPriorQueueItem(state *nfaState) *priorQueueItem {
 	return &priorQueueItem{
 		state:    state,
 		index:    -1,
 		priority: getPriority(state),
 	}
 }
 type priorityQueue []*priorQueueItem
 func (pq priorityQueue) Len() int {
 	return len(pq)
 }
 func (pq priorityQueue) Less(i, j int) bool {
 	if pq[i].priority == pq[j].priority {
 		return pq[i].index < pq[j].index
 	}
 	return pq[i].priority > pq[j].priority // We want max-heap, so we use greater-than
 }
 func (pq priorityQueue) Swap(i, j int) {
 	pq[i], pq[j] = pq[j], pq[i]
 	pq[i].index = i
 	pq[j].index = j
 }
 func (pq *priorityQueue) Push(x any) {
 	length := len(*pq)
 	item := x.(*priorQueueItem)
 	item.index = length
 	*pq = append(*pq, item)
 }
 func (pq *priorityQueue) Pop() any {
 	old := *pq
 	n := len(old)
 	item := old[n-1]
 	old[n-1] = nil
 	item.index = -1
 	*pq = old[0 : n-1]
 	return item
 }
 func (pq *priorityQueue) peek() any {
 	queue := *pq
 	n := len(queue)
 	return queue[n-1]
 }
 func (pq *priorityQueue) update(item *priorQueueItem, value *nfaState, priority int) {
 	item.state = value
 	item.priority = priority
 	heap.Fix(pq, item.index)
 }
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -701,7 +701,7 @@ func TestFind(t *testing.T) {
 					if len(test.result) == 0 {
 						return // Manually pass the test, because this is the expected behavior
 					} else {
-						t.Errorf("Wanted no match Got %v\n", groupIndex)
+						t.Errorf("Wanted %v Got no matches\n", test.result)
 					}
 				} else {
 					if groupIndex != test.result[0] {
Author	SHA1	Message	Date
Aadhavan Srinivasan	fbc9bea9fb	Commented out unused functions; use new nfaState parameters	2025-02-05 22:23:31 -05:00
Aadhavan Srinivasan	cca8c7cda2	Got rid of transitions parameter, changed how kleene state is processed I replaced the transition parameter for nfaState, replacing it with a single nfaState pointer. This is because any non-alternation state will only have one next state, so the map was just added complexity. I changed alternation processing - instead of having their own dedicated fields, they just use the new 'next' parameter, and another one called 'splitState'. I also changed the kleene state processing to remove the unecessary empty state in the right-side alternation (it actually messed up my matching).	2025-02-05 22:20:28 -05:00
Aadhavan Srinivasan	858e535fba	Continued implementing Thompson's algorithm	2025-02-05 18:01:36 -05:00
Aadhavan Srinivasan	7c62ba6bfd	Started implementing Thompson's algorithm for matching, because the old one was completely backtracking (so it would enter infinite loops on something like '(a)' ) The git diff claims that a ton of code was changed, but most of it was just indentation changes.	2025-02-05 12:21:12 -05:00
Aadhavan Srinivasan	d4e8cb74fd	Replaced pointer to nfaState with nfaState	2025-02-05 11:32:20 -05:00
Aadhavan Srinivasan	3ce611d121	More work towards implementing PCRE matching	2025-02-04 14:09:24 -05:00
Aadhavan Srinivasan	e0253dfaf3	Change kleene() to an alternation-style construct	2025-02-04 14:09:04 -05:00
Aadhavan Srinivasan	753e973d82	Started rewrite of matching algorithm, got concatenation and alternation done, kleene and zero-state stuff is next	2025-02-03 22:01:52 -05:00
Aadhavan Srinivasan	5563a70568	Reverse the order in which I pop states for alternation, because this messes with the left branch-right branch thing	2025-02-03 21:59:41 -05:00
Aadhavan Srinivasan	de0d7345a8	Store left and right branches of alternation separately	2025-02-03 21:59:05 -05:00
Aadhavan Srinivasan	ad273b0c68	Trying to emulate backtracking by using string pointers within threads (something similar to rsc's 2nd regexp article)	2025-02-03 16:50:11 -05:00
Aadhavan Srinivasan	e167cdb2cb	Fixed mistake in test output	2025-02-03 16:49:30 -05:00
Aadhavan Srinivasan	1fd48ae614	Store the current string pointer as a 'thread variable' (allows us to simulate backtracking)	2025-02-03 16:49:10 -05:00
Aadhavan Srinivasan	09812956ac	Disable all optimizations	2025-02-03 16:48:09 -05:00
Aadhavan Srinivasan	fbc9dfcc95	Trying something out; we'll see if it works	2025-02-03 16:47:53 -05:00
Aadhavan Srinivasan	bc32e0cb76	Started working on converting to PCRE matching rules (prefer left branch of alternation)	2025-02-03 14:06:14 -05:00
Aadhavan Srinivasan	ad0f7d0178	Added new state fields to tell if a state is a question or alternation	2025-02-03 14:05:53 -05:00
Aadhavan Srinivasan	4e597f8eb1	Implemented a priority-queue to use while matching	2025-02-03 14:05:30 -05:00