Fixed matching greediness eg. a(a|b)*a would not match 'aaa' in 'aaab'
This commit is contained in:
51
matching.go
51
matching.go
@@ -56,6 +56,11 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
|
|||||||
}
|
}
|
||||||
currentStates = append(currentStates, start)
|
currentStates = append(currentStates, start)
|
||||||
|
|
||||||
|
// Hold a list of match indices for the current run. When we
|
||||||
|
// can no longer find a match, the match with the largest range is
|
||||||
|
// chosen as the match for the entire string.
|
||||||
|
// This allows us to pick the longest possible match (which is how greedy matching works).
|
||||||
|
tempIndices := make([]matchIndex, 0)
|
||||||
// Main loop
|
// Main loop
|
||||||
for i < len(str) {
|
for i < len(str) {
|
||||||
foundPath = false
|
foundPath = false
|
||||||
@@ -74,7 +79,7 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
currentStates = append(currentStates, tempStates...)
|
currentStates, _ = unique_append(currentStates, tempStates...)
|
||||||
tempStates = nil
|
tempStates = nil
|
||||||
|
|
||||||
// Take any transitions corresponding to current character
|
// Take any transitions corresponding to current character
|
||||||
@@ -83,20 +88,28 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
|
|||||||
tempStates = append(tempStates, state.transitions[int(str[i])]...)
|
tempStates = append(tempStates, state.transitions[int(str[i])]...)
|
||||||
foundPath = true
|
foundPath = true
|
||||||
}
|
}
|
||||||
}
|
if state.isLast {
|
||||||
|
endIdx = i
|
||||||
if foundPath == false {
|
tempIndices, _ = unique_append(tempIndices, matchIndex{startIdx + offset, endIdx + offset})
|
||||||
// This enables the 'greedy' behavior - last-state status is only checked if we didn't find a path forward
|
|
||||||
for _, state := range currentStates {
|
|
||||||
if state.isLast {
|
|
||||||
endIdx = i
|
|
||||||
indices, _ = unique_append(indices, matchIndex{startIdx + offset, endIdx + offset})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over
|
}
|
||||||
|
// Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over
|
||||||
|
if foundPath == false {
|
||||||
if i == startingFrom {
|
if i == startingFrom {
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
// Get the maximum index-range from the list
|
||||||
|
if len(tempIndices) > 0 {
|
||||||
|
indexToAdd := Reduce(tempIndices, func(i1 matchIndex, i2 matchIndex) matchIndex {
|
||||||
|
r1 := i1.endIdx - i1.startIdx
|
||||||
|
r2 := i2.endIdx - i2.startIdx
|
||||||
|
if r1 >= r2 {
|
||||||
|
return i1
|
||||||
|
}
|
||||||
|
return i2
|
||||||
|
})
|
||||||
|
indices, _ = unique_append(indices, indexToAdd)
|
||||||
|
}
|
||||||
return findAllMatchesHelper(start, str[i:], indices, offset+i)
|
return findAllMatchesHelper(start, str[i:], indices, offset+i)
|
||||||
}
|
}
|
||||||
currentStates = make([]*State, len(tempStates))
|
currentStates = make([]*State, len(tempStates))
|
||||||
@@ -123,12 +136,24 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
|
|||||||
tempStates = nil
|
tempStates = nil
|
||||||
|
|
||||||
for _, state := range currentStates {
|
for _, state := range currentStates {
|
||||||
// Only add the match if we the start index is in bounds
|
// Only add the match if the start index is in bounds
|
||||||
if state.isLast && startIdx+offset < len(str)+offset {
|
if state.isLast && startIdx+offset < len(str)+offset {
|
||||||
endIdx = i
|
endIdx = i
|
||||||
indices, _ = unique_append(indices, matchIndex{startIdx + offset, endIdx + offset})
|
tempIndices, _ = unique_append(tempIndices, matchIndex{startIdx + offset, endIdx + offset})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Get the maximum index-range from the list
|
||||||
|
if len(tempIndices) > 0 {
|
||||||
|
indexToAdd := Reduce(tempIndices, func(i1 matchIndex, i2 matchIndex) matchIndex {
|
||||||
|
r1 := i1.endIdx - i1.startIdx
|
||||||
|
r2 := i2.endIdx - i2.startIdx
|
||||||
|
if r1 >= r2 {
|
||||||
|
return i1
|
||||||
|
}
|
||||||
|
return i2
|
||||||
|
})
|
||||||
|
indices, _ = unique_append(indices, indexToAdd)
|
||||||
|
}
|
||||||
|
|
||||||
// Default
|
// Default
|
||||||
return indices
|
return indices
|
||||||
|
Reference in New Issue
Block a user