Commented out unused functions; use new nfaState parameters

Got rid of transitions parameter, changed how kleene state is processed
I replaced the transition parameter for nfaState, replacing it with a single nfaState pointer. This is because any non-alternation state will only have one next state, so the map was just added complexity. I changed alternation processing - instead of having their own dedicated fields, they just use the new 'next' parameter, and another one called 'splitState'. I also changed the kleene state processing to remove the unecessary empty state in the right-side alternation (it actually messed up my matching).
2025-02-05 22:23:31 -05:00 · 2025-02-05 22:20:28 -05:00
3 changed files with 215 additions and 220 deletions
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -822,7 +822,6 @@ func thompson(re []postfixNode) (Reg, error) {
 	for _, c := range re {
 		if c.nodetype == characterNode || c.nodetype == assertionNode {
 			stateToAdd := nfaState{}
-			stateToAdd.transitions = make(map[int][]*nfaState)
 			if c.allChars {
 				stateToAdd.allChars = true
 				if len(c.except) != 0 {
@@ -934,7 +933,6 @@ func thompson(re []postfixNode) (Reg, error) {
 			s.isEmpty = true
 			s.output = make([]*nfaState, 0)
 			s.output = append(s.output, s)
-			s.transitions = make(map[int][]*nfaState)
 			// LPAREN nodes are just added normally
 			if c.nodetype == lparenNode {
 				numGroups++
@@ -966,7 +964,7 @@ func thompson(re []postfixNode) (Reg, error) {
 					s.groupNum = lparenNode.groupNum
 					to_add := concatenate(lparenNode, s)
 					nfa = append(nfa, to_add)
-				} else if middleNode.groupBegin && len(middleNode.transitions) == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
+				} else if middleNode.groupBegin && middleNode.numTransitions() == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
 					nfa = append(nfa, lparenNode)    // I shouldn't have popped this out, because it is not involved in the current capturing group
 					s.groupNum = middleNode.groupNum // In this case, the 'middle' node is actually an lparen
 					to_add := concatenate(middleNode, s)
@@ -1030,14 +1028,14 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, fmt.Errorf("error applying kleene star")
 			}
-			stateToAdd, err := kleene(*s1)
+			stateToAdd, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
 			nfa = append(nfa, stateToAdd)
 		case plusNode: // a+ is equivalent to aa*
 			s1 := mustPop(&nfa)
-			s2, err := kleene(*s1)
+			s2, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
@@ -1061,14 +1059,14 @@ func thompson(re []postfixNode) (Reg, error) {
 			// 	'^|a'
 			s2, err1 := pop(&nfa)
 			s1, err2 := pop(&nfa)
-			if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err2 != nil || (s2.groupBegin && s2.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back
 					nfa = append(nfa, s2)
 				}
 				tmp := zeroLengthMatchState()
 				s2 = &tmp
 			}
-			if err1 != nil || (s1.groupBegin && len(s1.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err1 == nil { // See above for explanation
 					nfa = append(nfa, s1)
 				}
@@ -1100,7 +1098,7 @@ func thompson(re []postfixNode) (Reg, error) {
 				stateToAdd = concatenate(stateToAdd, cloneState(poppedState))
 			}
 			if c.endReps == infinite_reps { // Case 3
-				s2, err := kleene(*poppedState)
+				s2, err := kleene(poppedState)
 				if err != nil {
 					return Reg{}, err
 				}
@@ -1117,7 +1115,10 @@ func thompson(re []postfixNode) (Reg, error) {
 		return Reg{}, fmt.Errorf("invalid regex")
 	}

-	verifyLastStates(nfa)
+	lastState := newState()
+	lastState.isLast = true
+
+	concatenate(nfa[0], &lastState)

 	return Reg{nfa[0], numGroups}, nil

--- a/regex/matching.go
+++ b/regex/matching.go
@@ -74,58 +74,58 @@ func getZeroGroup(m Match) Group {
 // given slice. It returns the resulting states. If any of the resulting states is a 0-state,
 // the second ret val is true.
 // If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
-func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
-	for _, state := range states {
-		if len(state.transitions[epsilon]) > 0 {
-			for _, s := range state.transitions[epsilon] {
-				if s.threadGroups == nil {
-					s.threadGroups = newMatch(numGroups + 1)
-				}
-				copy(s.threadGroups, state.threadGroups)
-				if s.groupBegin {
-					s.threadGroups[s.groupNum].StartIdx = idx
-					//					openParenGroups = append(openParenGroups, s.groupNum)
-				}
-				if s.groupEnd {
-					s.threadGroups[s.groupNum].EndIdx = idx
-					//					closeParenGroups = append(closeParenGroups, s.groupNum)
-				}
-			}
-			rtv = append(rtv, state.transitions[epsilon]...)
-		}
-	}
-	for _, state := range rtv {
-		if len(state.transitions[epsilon]) > 0 {
-			return rtv, true
-		}
-	}
-	return rtv, false
-}
+//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
+//	for _, state := range states {
+//		if len(state.transitions[epsilon]) > 0 {
+//			for _, s := range state.transitions[epsilon] {
+//				if s.threadGroups == nil {
+//					s.threadGroups = newMatch(numGroups + 1)
+//				}
+//				copy(s.threadGroups, state.threadGroups)
+//				if s.groupBegin {
+//					s.threadGroups[s.groupNum].StartIdx = idx
+//					//					openParenGroups = append(openParenGroups, s.groupNum)
+//				}
+//				if s.groupEnd {
+//					s.threadGroups[s.groupNum].EndIdx = idx
+//					//					closeParenGroups = append(closeParenGroups, s.groupNum)
+//				}
+//			}
+//			rtv = append(rtv, state.transitions[epsilon]...)
+//		}
+//	}
+//	for _, state := range rtv {
+//		if len(state.transitions[epsilon]) > 0 {
+//			return rtv, true
+//		}
+//	}
+//	return rtv, false
+//}

 // zeroMatchPossible returns true if a zero-length match is possible
 // from any of the given states, given the string and our position in it.
 // It uses the same algorithm to find zero-states as the one inside the loop,
 // so I should probably put it in a function.
-func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
-	zeroStates, isZero := takeZeroState(states, numGroups, idx)
-	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
-	tempstates = append(tempstates, states...)
-	tempstates = append(tempstates, zeroStates...)
-	num_appended := 0 // number of unique states addded to tempstates
-	for isZero == true {
-		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
-		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
-		if num_appended == 0 { // break if we haven't appended any more unique values
-			break
-		}
-	}
-	for _, state := range tempstates {
-		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
-			return true
-		}
-	}
-	return false
-}
+//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
+//	zeroStates, isZero := takeZeroState(states, numGroups, idx)
+//	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
+//	tempstates = append(tempstates, states...)
+//	tempstates = append(tempstates, zeroStates...)
+//	num_appended := 0 // number of unique states addded to tempstates
+//	for isZero == true {
+//		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
+//		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
+//		if num_appended == 0 { // break if we haven't appended any more unique values
+//			break
+//		}
+//	}
+//	for _, state := range tempstates {
+//		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
+//			return true
+//		}
+//	}
+//	return false
+//}

 // Prunes the slice by removing overlapping indices.
 func pruneIndices(indices []Match) []Match {
@@ -376,17 +376,26 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in

 			// Alternation - enqueue left then right state, and continue
 			if currentState.isAlternation {
-				leftState := currentState.leftState
-				copyThread(leftState, currentState)
-				currentStates = append(currentStates, *currentState.leftState)
-				rightState := currentState.rightState
-				copyThread(rightState, currentState)
-				currentStates = append(currentStates, *currentState.rightState)
+				if currentState.isKleene { // Reverse order of adding things
+					rightState := currentState.splitState
+					copyThread(rightState, currentState)
+					currentStates = append(currentStates, *currentState.splitState)
+					leftState := currentState.next
+					copyThread(leftState, currentState)
+					currentStates = append(currentStates, *currentState.next)
+				} else {
+					leftState := currentState.next
+					copyThread(leftState, currentState)
+					currentStates = append(currentStates, *currentState.next)
+					rightState := currentState.splitState
+					copyThread(rightState, currentState)
+					currentStates = append(currentStates, *currentState.splitState)
+				}
 				continue
 			}

 			// Empty state - enqueue next state, do _not_ increment the SP
-			if currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
+			if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
 				isEmptyAndNoAssertion = true
 			}

@@ -396,12 +405,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in

 			if isEmptyAndNoAssertion || foundMatch {
 				allMatches := make([]nfaState, 0)
-				for _, v := range currentState.transitions {
-					dereferenced := funcMap(v, func(s *nfaState) nfaState {
-						return *s
-					})
-					allMatches = append(allMatches, dereferenced...)
-				}
+				allMatches = append(allMatches, *(currentState.next))
 				slices.Reverse(allMatches)
 				for i := range allMatches {
 					copyThread(&allMatches[i], currentState)
@@ -419,24 +423,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 			}

 			if currentState.isLast && len(nextStates) == 0 { // Last state reached
-				if foundMatch {
-					if currentState.assert != noneAssert {
-						currentState.threadGroups[0].EndIdx = idx
-					} else {
-						currentState.threadGroups[0].EndIdx = idx + 1
-					}
-					if idx == currentState.threadGroups[0].StartIdx {
-						idx += 1
-					}
-					return true, currentState.threadGroups, idx
-				} else if isEmptyAndNoAssertion {
-					currentState.threadGroups[0].EndIdx = idx
-					if idx == currentState.threadGroups[0].StartIdx {
-						idx++
-					}
-					return true, currentState.threadGroups, idx
+				currentState.threadGroups[0].EndIdx = idx
+				if idx == currentState.threadGroups[0].StartIdx {
+					idx += 1
 				}
-
+				return true, currentState.threadGroups, idx
 			}
 		}
 		currentStates = append([]nfaState{}, nextStates...)
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -25,25 +25,25 @@ const (
 )

 type nfaState struct {
-	content                    stateContents       // Contents of current state
-	isEmpty                    bool                // If it is empty - Union operator and Kleene star states will be empty
-	isLast                     bool                // If it is the last state (acept state)
-	output                     []*nfaState         // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
-	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
-	isKleene                   bool                // Identifies whether current node is a 0-state representing Kleene star
-	isQuestion                 bool                // Identifies whether current node is a 0-state representing the question operator
-	isAlternation              bool                // Identifies whether current node is a 0-state representing an alternation
-	leftState                  *nfaState           // Only for alternation states - the 'left' branch of the alternation
-	rightState                 *nfaState           // Only for alternation states - the 'right' branch of the alternation
-	assert                     assertType          // Type of assertion of current node - NONE means that the node doesn't assert anything
-	allChars                   bool                // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
-	except                     []rune              // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
-	lookaroundRegex            string              // Only for lookaround states - Contents of the regex that the lookaround state holds
-	lookaroundNFA              *nfaState           // Holds the NFA of the lookaroundRegex - if it exists
-	lookaroundNumCaptureGroups int                 // Number of capturing groups in lookaround regex if current node is a lookaround
-	groupBegin                 bool                // Whether or not the node starts a capturing group
-	groupEnd                   bool                // Whether or not the node ends a capturing group
-	groupNum                   int                 // Which capturing group the node starts / ends
+	content stateContents // Contents of current state
+	isEmpty bool          // If it is empty - Union operator and Kleene star states will be empty
+	isLast  bool          // If it is the last state (acept state)
+	output  []*nfaState   // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
+	//	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
+	next                       *nfaState  // The next state (not for alternation or kleene states)
+	isKleene                   bool       // Identifies whether current node is a 0-state representing Kleene star
+	isQuestion                 bool       // Identifies whether current node is a 0-state representing the question operator
+	isAlternation              bool       // Identifies whether current node is a 0-state representing an alternation
+	splitState                 *nfaState  // Only for alternation states - the 'other' branch of the alternation ('next' is the first)
+	assert                     assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
+	allChars                   bool       // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
+	except                     []rune     // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
+	lookaroundRegex            string     // Only for lookaround states - Contents of the regex that the lookaround state holds
+	lookaroundNFA              *nfaState  // Holds the NFA of the lookaroundRegex - if it exists
+	lookaroundNumCaptureGroups int        // Number of capturing groups in lookaround regex if current node is a lookaround
+	groupBegin                 bool       // Whether or not the node starts a capturing group
+	groupEnd                   bool       // Whether or not the node ends a capturing group
+	groupNum                   int        // Which capturing group the node starts / ends
 	// The following properties depend on the current match - I should think about resetting them for every match.
 	zeroMatchFound bool    // Whether or not the state has been used for a zero-length match - only relevant for zero states
 	threadGroups   []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
@@ -73,7 +73,6 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 		isEmpty:         stateToClone.isEmpty,
 		isLast:          stateToClone.isLast,
 		output:          make([]*nfaState, len(stateToClone.output)),
-		transitions:     make(map[int][]*nfaState),
 		isKleene:        stateToClone.isKleene,
 		isQuestion:      stateToClone.isQuestion,
 		isAlternation:   stateToClone.isAlternation,
@@ -94,28 +93,18 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 			clone.output[i] = cloneStateHelper(s, cloneMap)
 		}
 	}
-	for k, v := range stateToClone.transitions {
-		clone.transitions[k] = make([]*nfaState, len(v))
-		for i, s := range v {
-			if s == stateToClone {
-				clone.transitions[k][i] = clone
-			} else {
-				clone.transitions[k][i] = cloneStateHelper(s, cloneMap)
-			}
-		}
-	}
 	if stateToClone.lookaroundNFA == stateToClone {
 		clone.lookaroundNFA = clone
 	}
 	clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap)
-	if stateToClone.leftState == stateToClone {
-		clone.leftState = clone
+	if stateToClone.splitState == stateToClone {
+		clone.splitState = clone
 	}
-	clone.leftState = cloneStateHelper(stateToClone.leftState, cloneMap)
-	if stateToClone.rightState == stateToClone {
-		clone.rightState = clone
+	clone.splitState = cloneStateHelper(stateToClone.splitState, cloneMap)
+	if stateToClone.next == stateToClone {
+		clone.next = clone
 	}
-	clone.rightState = cloneStateHelper(stateToClone.rightState, cloneMap)
+	clone.next = cloneStateHelper(stateToClone.next, cloneMap)
 	return clone
 }

@@ -126,6 +115,9 @@ func resetThreads(start *nfaState) {
 }

 func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
+	if state == nil {
+		return
+	}
 	if _, ok := visitedMap[state]; ok {
 		return
 	}
@@ -133,10 +125,11 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	state.threadGroups = nil
 	state.threadSP = 0
 	visitedMap[state] = true
-	for _, v := range state.transitions {
-		for _, nextState := range v {
-			resetThreadsHelper(nextState, visitedMap)
-		}
+	if state.isAlternation {
+		resetThreadsHelper(state.next, visitedMap)
+		resetThreadsHelper(state.splitState, visitedMap)
+	} else {
+		resetThreadsHelper(state.next, visitedMap)
 	}
 }

@@ -237,74 +230,84 @@ func (s nfaState) isLookaround() bool {
 	return s.assert == plaAssert || s.assert == plbAssert || s.assert == nlaAssert || s.assert == nlbAssert
 }

+func (s nfaState) numTransitions() int {
+	if s.next == nil && s.splitState == nil {
+		return 0
+	}
+	if s.next == nil || s.splitState == nil {
+		return 1
+	}
+	return 2
+}
+
 // Returns the matches for the character at the given index of the given string.
 // Also returns the number of matches. Returns -1 if an assertion failed.
-func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
-	// Assertions can be viewed as 'checks'. If the check fails, we return
-	// an empty array and 0.
-	// If it passes, we treat it like any other state, and return all the transitions.
-	if s.assert != noneAssert {
-		if s.checkAssertion(str, idx) == false {
-			return make([]*nfaState, 0), -1
-		}
-	}
-	listTransitions := s.transitions[int(str[idx])]
-	for _, dest := range s.transitions[int(anyCharRune)] {
-		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
-			// Add an allChar state to the list of matches if:
-			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
-			// 		b. The current character isn't the state's exception list.
-			listTransitions = append(listTransitions, dest)
-		}
-	}
-	numTransitions := len(listTransitions)
-	return listTransitions, numTransitions
-}
+//func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
+//	// Assertions can be viewed as 'checks'. If the check fails, we return
+//	// an empty array and 0.
+//	// If it passes, we treat it like any other state, and return all the transitions.
+//	if s.assert != noneAssert {
+//		if s.checkAssertion(str, idx) == false {
+//			return make([]*nfaState, 0), -1
+//		}
+//	}
+//	listTransitions := s.transitions[int(str[idx])]
+//	for _, dest := range s.transitions[int(anyCharRune)] {
+//		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
+//			// Add an allChar state to the list of matches if:
+//			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
+//			// 		b. The current character isn't the state's exception list.
+//			listTransitions = append(listTransitions, dest)
+//		}
+//	}
+//	numTransitions := len(listTransitions)
+//	return listTransitions, numTransitions
+//}

 // verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
-func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
-	if len(st.transitions) == 0 {
-		st.isLast = true
-		return
-	}
-	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
-	if len(st.transitions) == 1 { // Eg. a*
-		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
-		for _, c := range st.content {
-			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
-				moreThanOneTrans = true
-			}
-		}
-		st.isLast = !moreThanOneTrans
-	}
-
-	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
-		transitionDests := make([]*nfaState, 0)
-		for _, v := range st.transitions {
-			transitionDests = append(transitionDests, v...)
-		}
-		if allEqual(transitionDests...) {
-			st.isLast = true
-			return
-		}
-	}
-	if visited[st] == true {
-		return
-	}
-	visited[st] = true
-	for _, states := range st.transitions {
-		for i := range states {
-			if states[i] != st {
-				verifyLastStatesHelper(states[i], visited)
-			}
-		}
-	}
-}
+//func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
+//	if st.numTransitions() == 0 {
+//		st.isLast = true
+//		return
+//	}
+//	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
+//	if st.numTransitions() == 1 { // Eg. a*
+//		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
+//		for _, c := range st.content {
+//			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
+//				moreThanOneTrans = true
+//			}
+//		}
+//		st.isLast = !moreThanOneTrans
+//	}
+//
+//	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
+//		transitionDests := make([]*nfaState, 0)
+//		for _, v := range st.transitions {
+//			transitionDests = append(transitionDests, v...)
+//		}
+//		if allEqual(transitionDests...) {
+//			st.isLast = true
+//			return
+//		}
+//	}
+//	if visited[st] == true {
+//		return
+//	}
+//	visited[st] = true
+//	for _, states := range st.transitions {
+//		for i := range states {
+//			if states[i] != st {
+//				verifyLastStatesHelper(states[i], visited)
+//			}
+//		}
+//	}
+//}

 // verifyLastStates enables the 'isLast' flag for the leaf nodes (last states)
-func verifyLastStates(start []*nfaState) {
-	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
-}
+//func verifyLastStates(start []*nfaState) {
+//	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
+//}

 // Concatenates s1 and s2, returns the start of the concatenation.
 func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
@@ -312,69 +315,69 @@ func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
 		return s2
 	}
 	for i := range s1.output {
-		for _, c := range s2.content { // Create transitions for every element in s1's content to s2'
-			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], s2)
-		}
+		s1.output[i].next = s2
 	}
 	s1.output = s2.output
 	return s1
 }

-func kleene(s1 nfaState) (*nfaState, error) {
+func kleene(s1 *nfaState) (*nfaState, error) {
 	if s1.isEmpty && s1.assert != noneAssert {
 		return nil, fmt.Errorf("previous token is not quantifiable")
 	}

-	emptyState := zeroLengthMatchState()
-	emptyState.assert = noneAssert
-	toReturn := alternate(&s1, &emptyState)
+	toReturn := &nfaState{}
+	toReturn.isEmpty = true
+	toReturn.isAlternation = true
+	toReturn.content = newContents(epsilon)
+	toReturn.splitState = s1
+	for i := range s1.output {
+		s1.output[i].next = toReturn
+	}

 	//	toReturn := &nfaState{}
 	//	toReturn.transitions = make(map[int][]*nfaState)
 	//	toReturn.content = newContents(epsilon)
-	toReturn.isEmpty = true
 	toReturn.isKleene = true
-	toReturn.output = []*nfaState{&emptyState}
+	toReturn.output = append([]*nfaState{}, toReturn)
 	for i := range s1.output {
-		for _, c := range toReturn.content {
-			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], toReturn)
-		}
-	}
-	for _, c := range s1.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
+		s1.output[i].next = toReturn
 	}
+	//	for _, c := range s1.content {
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
+	//	}
 	//toReturn.kleeneState = &s1
 	return toReturn, nil
 }

 func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
 	toReturn := &nfaState{}
-	toReturn.transitions = make(map[int][]*nfaState)
+	//	toReturn.transitions = make(map[int][]*nfaState)
 	toReturn.output = append(toReturn.output, s1.output...)
 	toReturn.output = append(toReturn.output, s2.output...)
-	// Unique append is used here (and elsewhere) to ensure that,
-	// for any given transition, a state can only be mentioned once.
-	// For example, given the transition 'a', the state 's1' can only be mentioned once.
-	// This would lead to multiple instances of the same set of match indices, since both
-	// 's1' states would be considered to match.
-	for _, c := range s1.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
-	}
-	for _, c := range s2.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
-	}
+	//	// Unique append is used here (and elsewhere) to ensure that,
+	//	// for any given transition, a state can only be mentioned once.
+	//	// For example, given the transition 'a', the state 's1' can only be mentioned once.
+	//	// This would lead to multiple instances of the same set of match indices, since both
+	//	// 's1' states would be considered to match.
+	//	for _, c := range s1.content {
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
+	//	}
+	//	for _, c := range s2.content {
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
+	//	}
 	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
 	toReturn.isAlternation = true
-	toReturn.leftState = s1
-	toReturn.rightState = s2
+	toReturn.next = s1
+	toReturn.splitState = s2

 	return toReturn
 }

 func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
 	s2 := &nfaState{}
-	s2.transitions = make(map[int][]*nfaState)
+	//	s2.transitions = make(map[int][]*nfaState)
 	s2.content = newContents(epsilon)
 	s2.output = append(s2.output, s2)
 	s2.isEmpty = true
@@ -385,8 +388,8 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
 // Creates and returns a new state with the 'default' values.
 func newState() nfaState {
 	ret := nfaState{
-		output:          make([]*nfaState, 0),
-		transitions:     make(map[int][]*nfaState),
+		output: make([]*nfaState, 0),
+		//		transitions:     make(map[int][]*nfaState),
 		assert:          noneAssert,
 		except:          append([]rune{}, 0),
 		lookaroundRegex: "",