Started implementing Thompson's algorithm for matching, because the old one was completely backtracking (so it would enter infinite loops on something like '(a*)*' )
The git diff claims that a ton of code was changed, but most of it was just indentation changes.
This commit is contained in:
		| @@ -277,6 +277,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 	//startIdx := offset | ||||
| 	//endIdx := offset | ||||
| 	currentStates := make([]nfaState, 0) | ||||
| 	nextStates := make([]nfaState, 0) | ||||
| 	//	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration | ||||
| 	i := offset // Index in string | ||||
| 	//startingFrom := i                  // Store starting index | ||||
| @@ -311,103 +312,106 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 	var foundMatch bool | ||||
| 	var isEmptyAndNoAssertion bool | ||||
| 	// Main loop | ||||
| 	for len(currentStates) > 0 { | ||||
| 		currentState, _ := pop(¤tStates) | ||||
| 		idx := currentState.threadSP | ||||
| 		foundMatch = false | ||||
| 		isEmptyAndNoAssertion = false | ||||
| 	for idx := i; idx <= len(str); idx++ { | ||||
| 		for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ { | ||||
| 			currentState := currentStates[currentStateIdx] | ||||
| 			foundMatch = false | ||||
| 			isEmptyAndNoAssertion = false | ||||
|  | ||||
| 		if currentState.threadGroups == nil { | ||||
| 			currentState.threadGroups = newMatch(numGroups + 1) | ||||
| 			currentState.threadGroups[0].StartIdx = idx | ||||
| 		} | ||||
|  | ||||
| 		if currentState.groupBegin { | ||||
| 			currentState.threadGroups[currentState.groupNum].StartIdx = idx | ||||
| 		} | ||||
|  | ||||
| 		if currentState.groupEnd { | ||||
| 			currentState.threadGroups[currentState.groupNum].EndIdx = idx | ||||
| 		} | ||||
|  | ||||
| 		//		if currentState.isKleene { | ||||
| 		//			// Append the next-state (after the kleene), then append the kleene state | ||||
| 		//			allMatches := make([]*nfaState, 0) | ||||
| 		//			for _, v := range currentState.transitions { | ||||
| 		//				allMatches = append(allMatches, v...) | ||||
| 		//			} | ||||
| 		//			slices.Reverse(allMatches) | ||||
| 		//			for _, m := range allMatches { | ||||
| 		//				m.threadGroups = currentState.threadGroups | ||||
| 		//				m.threadSP = idx | ||||
| 		//			} | ||||
| 		//			currentStates = append(currentStates, allMatches...) | ||||
| 		// | ||||
| 		//			//	kleeneState := currentState.kleeneState | ||||
| 		//			//	kleeneState.threadGroups = currentState.threadGroups | ||||
| 		//			//	kleeneState.threadSP = currentState.threadSP | ||||
| 		//			//	currentStates = append(currentStates, kleeneState) | ||||
| 		//			continue | ||||
| 		//		} | ||||
|  | ||||
| 		// Alternation - enqueue left then right state, and continue | ||||
| 		if currentState.isAlternation { | ||||
| 			rightState := currentState.rightState | ||||
| 			copyThread(rightState, currentState) | ||||
| 			currentStates = append(currentStates, *currentState.rightState) | ||||
| 			leftState := currentState.leftState | ||||
| 			copyThread(leftState, currentState) | ||||
| 			currentStates = append(currentStates, *currentState.leftState) | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		// Empty state - enqueue next state, do _not_ increment the SP | ||||
| 		if currentState.isEmpty && currentState.assert == noneAssert { | ||||
| 			isEmptyAndNoAssertion = true | ||||
| 		} | ||||
|  | ||||
| 		if currentState.contentContains(str, idx) { | ||||
| 			foundMatch = true | ||||
| 		} | ||||
|  | ||||
| 		if isEmptyAndNoAssertion || foundMatch { | ||||
| 			allMatches := make([]nfaState, 0) | ||||
| 			for _, v := range currentState.transitions { | ||||
| 				dereferenced := funcMap(v, func(s *nfaState) nfaState { | ||||
| 					return *s | ||||
| 				}) | ||||
| 				allMatches = append(allMatches, dereferenced...) | ||||
| 			if currentState.threadGroups == nil { | ||||
| 				currentState.threadGroups = newMatch(numGroups + 1) | ||||
| 				currentState.threadGroups[0].StartIdx = idx | ||||
| 			} | ||||
| 			slices.Reverse(allMatches) | ||||
| 			for i := range allMatches { | ||||
| 				copyThread(&allMatches[i], currentState) | ||||
| 				if foundMatch && currentState.assert == noneAssert { | ||||
| 					allMatches[i].threadSP += 1 | ||||
|  | ||||
| 			if currentState.groupBegin { | ||||
| 				currentState.threadGroups[currentState.groupNum].StartIdx = idx | ||||
| 			} | ||||
|  | ||||
| 			if currentState.groupEnd { | ||||
| 				currentState.threadGroups[currentState.groupNum].EndIdx = idx | ||||
| 			} | ||||
|  | ||||
| 			//		if currentState.isKleene { | ||||
| 			//			// Append the next-state (after the kleene), then append the kleene state | ||||
| 			//			allMatches := make([]*nfaState, 0) | ||||
| 			//			for _, v := range currentState.transitions { | ||||
| 			//				allMatches = append(allMatches, v...) | ||||
| 			//			} | ||||
| 			//			slices.Reverse(allMatches) | ||||
| 			//			for _, m := range allMatches { | ||||
| 			//				m.threadGroups = currentState.threadGroups | ||||
| 			//				m.threadSP = idx | ||||
| 			//			} | ||||
| 			//			currentStates = append(currentStates, allMatches...) | ||||
| 			// | ||||
| 			//			//	kleeneState := currentState.kleeneState | ||||
| 			//			//	kleeneState.threadGroups = currentState.threadGroups | ||||
| 			//			//	kleeneState.threadSP = currentState.threadSP | ||||
| 			//			//	currentStates = append(currentStates, kleeneState) | ||||
| 			//			continue | ||||
| 			//		} | ||||
|  | ||||
| 			// Alternation - enqueue left then right state, and continue | ||||
| 			if currentState.isAlternation { | ||||
| 				leftState := currentState.leftState | ||||
| 				copyThread(leftState, currentState) | ||||
| 				currentStates = append(currentStates, *currentState.leftState) | ||||
| 				rightState := currentState.rightState | ||||
| 				copyThread(rightState, currentState) | ||||
| 				currentStates = append(currentStates, *currentState.rightState) | ||||
| 				continue | ||||
| 			} | ||||
|  | ||||
| 			// Empty state - enqueue next state, do _not_ increment the SP | ||||
| 			if currentState.isEmpty && currentState.assert == noneAssert { | ||||
| 				isEmptyAndNoAssertion = true | ||||
| 			} | ||||
|  | ||||
| 			if currentState.contentContains(str, idx) { | ||||
| 				foundMatch = true | ||||
| 			} | ||||
|  | ||||
| 			if isEmptyAndNoAssertion || foundMatch { | ||||
| 				allMatches := make([]nfaState, 0) | ||||
| 				for _, v := range currentState.transitions { | ||||
| 					dereferenced := funcMap(v, func(s *nfaState) nfaState { | ||||
| 						return *s | ||||
| 					}) | ||||
| 					allMatches = append(allMatches, dereferenced...) | ||||
| 				} | ||||
| 				slices.Reverse(allMatches) | ||||
| 				for i := range allMatches { | ||||
| 					copyThread(&allMatches[i], currentState) | ||||
| 					if foundMatch && currentState.assert == noneAssert { | ||||
| 						allMatches[i].threadSP += 1 | ||||
| 					} | ||||
| 				} | ||||
| 				nextStates = append(nextStates, allMatches...) | ||||
| 			} | ||||
| 			currentStates = append(currentStates, allMatches...) | ||||
| 		} | ||||
|  | ||||
| 		if currentState.isLast { // Last state reached | ||||
| 			if foundMatch { | ||||
| 				if currentState.assert != noneAssert { | ||||
| 			if currentState.isLast { // Last state reached | ||||
| 				if foundMatch { | ||||
| 					if currentState.assert != noneAssert { | ||||
| 						currentState.threadGroups[0].EndIdx = idx | ||||
| 					} else { | ||||
| 						currentState.threadGroups[0].EndIdx = idx + 1 | ||||
| 					} | ||||
| 					if idx == currentState.threadGroups[0].StartIdx { | ||||
| 						idx += 1 | ||||
| 					} | ||||
| 					return true, currentState.threadGroups, idx | ||||
| 				} else if currentState.isEmpty && currentState.assert == noneAssert { | ||||
| 					currentState.threadGroups[0].EndIdx = idx | ||||
| 				} else { | ||||
| 					currentState.threadGroups[0].EndIdx = idx + 1 | ||||
| 					if idx == currentState.threadGroups[0].StartIdx { | ||||
| 						idx++ | ||||
| 					} | ||||
| 					return true, currentState.threadGroups, idx | ||||
| 				} | ||||
| 				if idx == currentState.threadGroups[0].StartIdx { | ||||
| 					idx += 1 | ||||
| 				} | ||||
| 				return true, currentState.threadGroups, idx | ||||
| 			} else if currentState.isEmpty && currentState.assert == noneAssert { | ||||
| 				currentState.threadGroups[0].EndIdx = idx | ||||
| 				if idx == currentState.threadGroups[0].StartIdx { | ||||
| 					idx++ | ||||
| 				} | ||||
| 				return true, currentState.threadGroups, idx | ||||
| 			} | ||||
|  | ||||
| 			} | ||||
| 		} | ||||
| 		copy(currentStates, nextStates) | ||||
| 		nextStates = nil | ||||
| 	} | ||||
| 	return false, []Group{}, i + 1 | ||||
| 	//		zeroStates := make([]*nfaState, 0) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user