From 8e8e9e133ff1c55c638a6b581e1efb1c2112ddc6 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Tue, 29 Oct 2024 20:07:30 -0400 Subject: [PATCH] Fixed matching greediness eg. a(a|b)*a would not match 'aaa' in 'aaab' --- matching.go | 49 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/matching.go b/matching.go index 0fc0cbe..fcc972f 100644 --- a/matching.go +++ b/matching.go @@ -56,6 +56,11 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset } currentStates = append(currentStates, start) + // Hold a list of match indices for the current run. When we + // can no longer find a match, the match with the largest range is + // chosen as the match for the entire string. + // This allows us to pick the longest possible match (which is how greedy matching works). + tempIndices := make([]matchIndex, 0) // Main loop for i < len(str) { foundPath = false @@ -74,7 +79,7 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset } } - currentStates = append(currentStates, tempStates...) + currentStates, _ = unique_append(currentStates, tempStates...) tempStates = nil // Take any transitions corresponding to current character @@ -83,20 +88,28 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset tempStates = append(tempStates, state.transitions[int(str[i])]...) foundPath = true } + if state.isLast { + endIdx = i + tempIndices, _ = unique_append(tempIndices, matchIndex{startIdx + offset, endIdx + offset}) + } } - + // Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over if foundPath == false { - // This enables the 'greedy' behavior - last-state status is only checked if we didn't find a path forward - for _, state := range currentStates { - if state.isLast { - endIdx = i - indices, _ = unique_append(indices, matchIndex{startIdx + offset, endIdx + offset}) - } - } - // Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over if i == startingFrom { i++ } + // Get the maximum index-range from the list + if len(tempIndices) > 0 { + indexToAdd := Reduce(tempIndices, func(i1 matchIndex, i2 matchIndex) matchIndex { + r1 := i1.endIdx - i1.startIdx + r2 := i2.endIdx - i2.startIdx + if r1 >= r2 { + return i1 + } + return i2 + }) + indices, _ = unique_append(indices, indexToAdd) + } return findAllMatchesHelper(start, str[i:], indices, offset+i) } currentStates = make([]*State, len(tempStates)) @@ -123,12 +136,24 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset tempStates = nil for _, state := range currentStates { - // Only add the match if we the start index is in bounds + // Only add the match if the start index is in bounds if state.isLast && startIdx+offset < len(str)+offset { endIdx = i - indices, _ = unique_append(indices, matchIndex{startIdx + offset, endIdx + offset}) + tempIndices, _ = unique_append(tempIndices, matchIndex{startIdx + offset, endIdx + offset}) } } + // Get the maximum index-range from the list + if len(tempIndices) > 0 { + indexToAdd := Reduce(tempIndices, func(i1 matchIndex, i2 matchIndex) matchIndex { + r1 := i1.endIdx - i1.startIdx + r2 := i2.endIdx - i2.startIdx + if r1 >= r2 { + return i1 + } + return i2 + }) + indices, _ = unique_append(indices, indexToAdd) + } // Default return indices