Fixed matching greediness eg. a(a|b)*a would not match 'aaa' in 'aaab'

master
Aadhavan Srinivasan 2 months ago
parent a619fd24f6
commit 8e8e9e133f

@ -56,6 +56,11 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
} }
currentStates = append(currentStates, start) currentStates = append(currentStates, start)
// Hold a list of match indices for the current run. When we
// can no longer find a match, the match with the largest range is
// chosen as the match for the entire string.
// This allows us to pick the longest possible match (which is how greedy matching works).
tempIndices := make([]matchIndex, 0)
// Main loop // Main loop
for i < len(str) { for i < len(str) {
foundPath = false foundPath = false
@ -74,7 +79,7 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
} }
} }
currentStates = append(currentStates, tempStates...) currentStates, _ = unique_append(currentStates, tempStates...)
tempStates = nil tempStates = nil
// Take any transitions corresponding to current character // Take any transitions corresponding to current character
@ -83,20 +88,28 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
tempStates = append(tempStates, state.transitions[int(str[i])]...) tempStates = append(tempStates, state.transitions[int(str[i])]...)
foundPath = true foundPath = true
} }
if state.isLast {
endIdx = i
tempIndices, _ = unique_append(tempIndices, matchIndex{startIdx + offset, endIdx + offset})
}
} }
// Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over
if foundPath == false { if foundPath == false {
// This enables the 'greedy' behavior - last-state status is only checked if we didn't find a path forward
for _, state := range currentStates {
if state.isLast {
endIdx = i
indices, _ = unique_append(indices, matchIndex{startIdx + offset, endIdx + offset})
}
}
// Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over
if i == startingFrom { if i == startingFrom {
i++ i++
} }
// Get the maximum index-range from the list
if len(tempIndices) > 0 {
indexToAdd := Reduce(tempIndices, func(i1 matchIndex, i2 matchIndex) matchIndex {
r1 := i1.endIdx - i1.startIdx
r2 := i2.endIdx - i2.startIdx
if r1 >= r2 {
return i1
}
return i2
})
indices, _ = unique_append(indices, indexToAdd)
}
return findAllMatchesHelper(start, str[i:], indices, offset+i) return findAllMatchesHelper(start, str[i:], indices, offset+i)
} }
currentStates = make([]*State, len(tempStates)) currentStates = make([]*State, len(tempStates))
@ -123,12 +136,24 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
tempStates = nil tempStates = nil
for _, state := range currentStates { for _, state := range currentStates {
// Only add the match if we the start index is in bounds // Only add the match if the start index is in bounds
if state.isLast && startIdx+offset < len(str)+offset { if state.isLast && startIdx+offset < len(str)+offset {
endIdx = i endIdx = i
indices, _ = unique_append(indices, matchIndex{startIdx + offset, endIdx + offset}) tempIndices, _ = unique_append(tempIndices, matchIndex{startIdx + offset, endIdx + offset})
} }
} }
// Get the maximum index-range from the list
if len(tempIndices) > 0 {
indexToAdd := Reduce(tempIndices, func(i1 matchIndex, i2 matchIndex) matchIndex {
r1 := i1.endIdx - i1.startIdx
r2 := i2.endIdx - i2.startIdx
if r1 >= r2 {
return i1
}
return i2
})
indices, _ = unique_append(indices, indexToAdd)
}
// Default // Default
return indices return indices

Loading…
Cancel
Save