diff --git a/regex/matching.go b/regex/matching.go index 1263e37..23f8317 100644 --- a/regex/matching.go +++ b/regex/matching.go @@ -151,6 +151,11 @@ func pruneIndices(indices []Match) []Match { return toRet } +func copyThread(to *nfaState, from nfaState) { + to.threadSP = from.threadSP + to.threadGroups = from.threadGroups +} + // Find returns the 0-group of the leftmost match of the regex in the given string. // An error value != nil indicates that no match was found. func (regex Reg) Find(str string) (Group, error) { @@ -271,7 +276,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in // foundPath := false //startIdx := offset //endIdx := offset - currentStates := make([]*nfaState, 0) + currentStates := make([]nfaState, 0) // tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration i := offset // Index in string //startingFrom := i // Store starting index @@ -302,13 +307,15 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in //} start.threadSP = i - currentStates = append(currentStates, start) + currentStates = append(currentStates, *start) var foundMatch bool + var isEmptyAndNoAssertion bool // Main loop for len(currentStates) > 0 { currentState, _ := pop(¤tStates) idx := currentState.threadSP foundMatch = false + isEmptyAndNoAssertion = false if currentState.threadGroups == nil { currentState.threadGroups = newMatch(numGroups + 1) @@ -343,44 +350,39 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in // continue // } + // Alternation - enqueue left then right state, and continue if currentState.isAlternation { rightState := currentState.rightState - rightState.threadGroups = currentState.threadGroups - rightState.threadSP = currentState.threadSP - currentStates = append(currentStates, currentState.rightState) + copyThread(rightState, currentState) + currentStates = append(currentStates, *currentState.rightState) leftState := currentState.leftState - leftState.threadGroups = currentState.threadGroups - leftState.threadSP = currentState.threadSP - currentStates = append(currentStates, currentState.leftState) + copyThread(leftState, currentState) + currentStates = append(currentStates, *currentState.leftState) continue } + // Empty state - enqueue next state, do _not_ increment the SP if currentState.isEmpty && currentState.assert == noneAssert { - allMatches := make([]*nfaState, 0) - for _, v := range currentState.transitions { - allMatches = append(allMatches, v...) - } - slices.Reverse(allMatches) - for _, m := range allMatches { - m.threadGroups = currentState.threadGroups - m.threadSP = idx - } - currentStates = append(currentStates, allMatches...) + isEmptyAndNoAssertion = true } if currentState.contentContains(str, idx) { foundMatch = true - allMatches := make([]*nfaState, 0) + } + + if isEmptyAndNoAssertion || foundMatch { + allMatches := make([]nfaState, 0) for _, v := range currentState.transitions { - allMatches = append(allMatches, v...) + dereferenced := funcMap(v, func(s *nfaState) nfaState { + return *s + }) + allMatches = append(allMatches, dereferenced...) } slices.Reverse(allMatches) - for _, m := range allMatches { - m.threadGroups = currentState.threadGroups - if currentState.assert == noneAssert { - m.threadSP = idx + 1 - } else { - m.threadSP = idx + for i := range allMatches { + copyThread(&allMatches[i], currentState) + if foundMatch && currentState.assert == noneAssert { + allMatches[i].threadSP += 1 } } currentStates = append(currentStates, allMatches...) @@ -388,8 +390,15 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in if currentState.isLast { // Last state reached if foundMatch { - currentState.threadGroups[0].EndIdx = idx + 1 - return true, currentState.threadGroups, idx + 1 + if currentState.assert != noneAssert { + currentState.threadGroups[0].EndIdx = idx + } else { + currentState.threadGroups[0].EndIdx = idx + 1 + } + if idx == currentState.threadGroups[0].StartIdx { + idx += 1 + } + return true, currentState.threadGroups, idx } else if currentState.isEmpty && currentState.assert == noneAssert { currentState.threadGroups[0].EndIdx = idx if idx == currentState.threadGroups[0].StartIdx {