Trying to emulate backtracking by using string pointers within threads (something similar to rsc's 2nd regexp article)
This commit is contained in:
		| @@ -1,6 +1,7 @@ | ||||
| package regex | ||||
|  | ||||
| import ( | ||||
| 	"container/heap" | ||||
| 	"fmt" | ||||
| 	"slices" | ||||
| 	"sort" | ||||
| @@ -271,7 +272,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 	foundPath := false | ||||
| 	startIdx := offset | ||||
| 	endIdx := offset | ||||
| 	currentStates := make([]*nfaState, 0) | ||||
| 	currentStates := &priorityQueue{} | ||||
| 	heap.Init(currentStates) | ||||
| 	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration | ||||
| 	i := offset                        // Index in string | ||||
| 	startingFrom := i                  // Store starting index | ||||
| @@ -301,16 +303,19 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 		//		tempIndices[start.groupNum].startIdx = i | ||||
| 	} | ||||
|  | ||||
| 	currentStates = append(currentStates, start) | ||||
|  | ||||
| 	start.threadSP = i | ||||
| 	heap.Push(currentStates, newPriorQueueItem(start)) | ||||
| 	// Main loop | ||||
| 	for i < len(str) { | ||||
| 	for currentStates.Len() > 0 { | ||||
| 		currentState := heap.Pop(currentStates) | ||||
| 		foundPath = false | ||||
|  | ||||
| 		zeroStates := make([]*nfaState, 0) | ||||
| 		// Keep taking zero-states, until there are no more left to take | ||||
| 		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take. | ||||
| 		zeroStates, isZero := takeZeroState(currentStates, numGroups, i) | ||||
| 		topStateItem := currentStates.peek() | ||||
| 		topState := topStateItem.(*priorQueueItem).state | ||||
| 		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i) | ||||
| 		tempStates = append(tempStates, zeroStates...) | ||||
| 		num_appended := 0 | ||||
| 		for isZero == true { | ||||
| @@ -320,8 +325,13 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		if isZero == true { | ||||
| 			currentStates.Pop() | ||||
| 		} | ||||
|  | ||||
| 		currentStates = slices.Concat(currentStates, tempStates) | ||||
| 		for _, state := range tempStates { | ||||
| 			heap.Push(currentStates, newPriorQueueItem(state)) | ||||
| 		} | ||||
| 		tempStates = nil | ||||
|  | ||||
| 		// Take any transitions corresponding to current character | ||||
| @@ -331,10 +341,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found | ||||
| 		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states | ||||
| 		for numStatesMatched == 0 && lastStateInList == false { | ||||
| 			if len(currentStates) == 0 { | ||||
| 			if currentStates.Len() == 0 { | ||||
| 				break | ||||
| 			} | ||||
| 			state, _ := pop(¤tStates) | ||||
| 			stateItem := heap.Pop(currentStates) | ||||
| 			state := stateItem.(*priorQueueItem).state | ||||
| 			matches, numMatches := state.matchesFor(str, i) | ||||
| 			if numMatches > 0 { | ||||
| 				numStatesMatched++ | ||||
| @@ -344,6 +355,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 					if m.threadGroups == nil { | ||||
| 						m.threadGroups = newMatch(numGroups + 1) | ||||
| 					} | ||||
| 					m.threadSP = state.threadSP + 1 | ||||
| 					copy(m.threadGroups, state.threadGroups) | ||||
| 				} | ||||
| 			} | ||||
| @@ -382,7 +394,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 		// 	a. A last-state | ||||
| 		// 	b. Empty | ||||
| 		// 	c. Doesn't assert anything | ||||
| 		for _, s := range currentStates { | ||||
| 		for _, stateItem := range *currentStates { | ||||
| 			s := stateItem.state | ||||
| 			if s.isLast && s.isEmpty && s.assert == noneAssert { | ||||
| 				lastStatePtr = s | ||||
| 				lastStateInList = true | ||||
| @@ -403,7 +416,10 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
|  | ||||
| 		// Check if we can find a zero-length match | ||||
| 		if foundPath == false { | ||||
| 			if ok := zeroMatchPossible(str, i, numGroups, currentStates...); ok { | ||||
| 			currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState { | ||||
| 				return item.state | ||||
| 			}) | ||||
| 			if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok { | ||||
| 				if tempIndices[0].IsValid() == false { | ||||
| 					tempIndices[0] = Group{startIdx, startIdx} | ||||
| 				} | ||||
| @@ -423,8 +439,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 			} | ||||
| 			return false, []Group{}, startIdx | ||||
| 		} | ||||
| 		currentStates = make([]*nfaState, len(tempStates)) | ||||
| 		copy(currentStates, tempStates) | ||||
| 		currentStates = &priorityQueue{} | ||||
| 		slices.Reverse(tempStates) | ||||
| 		for _, state := range tempStates { | ||||
| 			heap.Push(currentStates, newPriorQueueItem(state)) | ||||
| 		} | ||||
| 		tempStates = nil | ||||
|  | ||||
| 		i++ | ||||
| @@ -432,21 +451,28 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
|  | ||||
| 	// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position. | ||||
| 	// This is the exact same algorithm used inside the loop, so I should probably put it in a function. | ||||
| 	zeroStates, isZero := takeZeroState(currentStates, numGroups, i) | ||||
| 	tempStates = append(tempStates, zeroStates...) | ||||
| 	num_appended := 0 // Number of unique states addded to tempStates | ||||
| 	for isZero == true { | ||||
| 		zeroStates, isZero = takeZeroState(tempStates, numGroups, i) | ||||
| 		tempStates, num_appended = uniqueAppend(tempStates, zeroStates...) | ||||
| 		if num_appended == 0 { // Break if we haven't appended any more unique values | ||||
| 			break | ||||
| 	if currentStates.Len() > 0 { | ||||
| 		topStateItem := currentStates.peek() | ||||
| 		topState := topStateItem.(*priorQueueItem).state | ||||
| 		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i) | ||||
| 		tempStates = append(tempStates, zeroStates...) | ||||
| 		num_appended := 0 // Number of unique states addded to tempStates | ||||
| 		for isZero == true { | ||||
| 			zeroStates, isZero = takeZeroState(tempStates, numGroups, i) | ||||
| 			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...) | ||||
| 			if num_appended == 0 { // Break if we haven't appended any more unique values | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	currentStates = append(currentStates, tempStates...) | ||||
| 	for _, state := range tempStates { | ||||
| 		heap.Push(currentStates, newPriorQueueItem(state)) | ||||
| 	} | ||||
| 	tempStates = nil | ||||
|  | ||||
| 	for _, state := range currentStates { | ||||
| 	for _, stateItem := range *currentStates { | ||||
| 		state := stateItem.state | ||||
| 		// Only add the match if the start index is in bounds. If the state has an assertion, | ||||
| 		// make sure the assertion checks out. | ||||
| 		if state.isLast && i <= len(str) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user