Fixed matching greediness eg. a(a|b)*a would not match 'aaa' in 'aaab'
This commit is contained in:
51
matching.go
51
matching.go
@@ -56,6 +56,11 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
|
||||
}
|
||||
currentStates = append(currentStates, start)
|
||||
|
||||
// Hold a list of match indices for the current run. When we
|
||||
// can no longer find a match, the match with the largest range is
|
||||
// chosen as the match for the entire string.
|
||||
// This allows us to pick the longest possible match (which is how greedy matching works).
|
||||
tempIndices := make([]matchIndex, 0)
|
||||
// Main loop
|
||||
for i < len(str) {
|
||||
foundPath = false
|
||||
@@ -74,7 +79,7 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
|
||||
}
|
||||
}
|
||||
|
||||
currentStates = append(currentStates, tempStates...)
|
||||
currentStates, _ = unique_append(currentStates, tempStates...)
|
||||
tempStates = nil
|
||||
|
||||
// Take any transitions corresponding to current character
|
||||
@@ -83,20 +88,28 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
|
||||
tempStates = append(tempStates, state.transitions[int(str[i])]...)
|
||||
foundPath = true
|
||||
}
|
||||
}
|
||||
|
||||
if foundPath == false {
|
||||
// This enables the 'greedy' behavior - last-state status is only checked if we didn't find a path forward
|
||||
for _, state := range currentStates {
|
||||
if state.isLast {
|
||||
endIdx = i
|
||||
indices, _ = unique_append(indices, matchIndex{startIdx + offset, endIdx + offset})
|
||||
}
|
||||
if state.isLast {
|
||||
endIdx = i
|
||||
tempIndices, _ = unique_append(tempIndices, matchIndex{startIdx + offset, endIdx + offset})
|
||||
}
|
||||
// Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over
|
||||
}
|
||||
// Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over
|
||||
if foundPath == false {
|
||||
if i == startingFrom {
|
||||
i++
|
||||
}
|
||||
// Get the maximum index-range from the list
|
||||
if len(tempIndices) > 0 {
|
||||
indexToAdd := Reduce(tempIndices, func(i1 matchIndex, i2 matchIndex) matchIndex {
|
||||
r1 := i1.endIdx - i1.startIdx
|
||||
r2 := i2.endIdx - i2.startIdx
|
||||
if r1 >= r2 {
|
||||
return i1
|
||||
}
|
||||
return i2
|
||||
})
|
||||
indices, _ = unique_append(indices, indexToAdd)
|
||||
}
|
||||
return findAllMatchesHelper(start, str[i:], indices, offset+i)
|
||||
}
|
||||
currentStates = make([]*State, len(tempStates))
|
||||
@@ -123,12 +136,24 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
|
||||
tempStates = nil
|
||||
|
||||
for _, state := range currentStates {
|
||||
// Only add the match if we the start index is in bounds
|
||||
// Only add the match if the start index is in bounds
|
||||
if state.isLast && startIdx+offset < len(str)+offset {
|
||||
endIdx = i
|
||||
indices, _ = unique_append(indices, matchIndex{startIdx + offset, endIdx + offset})
|
||||
tempIndices, _ = unique_append(tempIndices, matchIndex{startIdx + offset, endIdx + offset})
|
||||
}
|
||||
}
|
||||
// Get the maximum index-range from the list
|
||||
if len(tempIndices) > 0 {
|
||||
indexToAdd := Reduce(tempIndices, func(i1 matchIndex, i2 matchIndex) matchIndex {
|
||||
r1 := i1.endIdx - i1.startIdx
|
||||
r2 := i2.endIdx - i2.startIdx
|
||||
if r1 >= r2 {
|
||||
return i1
|
||||
}
|
||||
return i2
|
||||
})
|
||||
indices, _ = unique_append(indices, indexToAdd)
|
||||
}
|
||||
|
||||
// Default
|
||||
return indices
|
||||
|
Reference in New Issue
Block a user