Got rid of unnecessary special case to match at end-of-string

Instead, I tweaked the rest of the matching function, so that a special
check isn't necessary. If we are trying to match at the end of a string,
we skip any of the actual matching and proceed straight to finding
0-length matches.

This change was made because, with the special case, capturing groups
weren't getting updated if we had an end-of-string match.
master
Aadhavan Srinivasan 1 week ago
parent 8c8e209587
commit 71cab59a89

@ -60,8 +60,7 @@ func (g Group) isValid() bool {
// takeZeroState takes the 0-state (if such a transition exists) for all states in the // takeZeroState takes the 0-state (if such a transition exists) for all states in the
// given slice. It returns the resulting states. If any of the resulting states is a 0-state, // given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// the second ret val is true. // the second ret val is true.
// The third ret val is a list of all the group numbers of all the opening parentheses we crossed, // If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
// and the fourth is a list of all the closing parentheses we passed
func takeZeroState(states []*State, numGroups int, idx int) (rtv []*State, isZero bool) { func takeZeroState(states []*State, numGroups int, idx int) (rtv []*State, isZero bool) {
for _, state := range states { for _, state := range states {
if len(state.transitions[EPSILON]) > 0 { if len(state.transitions[EPSILON]) > 0 {
@ -169,23 +168,6 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
// The second value here shouldn't be used, because we should exit when the third return value is > than len(str) // The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
return false, []Group{}, offset return false, []Group{}, offset
} }
// 'Base case' - if we are at the end of the string, check if we can add a zero-length match
if offset == len(str) {
// Get all zero-state matches. If we can get to a zero-state without matching anything, we
// can add a zero-length match. This is all true only if the start state itself matches nothing.
// TODO - fill in capturing groups for these matches
if start.isEmpty {
to_return := newMatch(numGroups + 1)
if start.groupBegin {
to_return[start.groupNum].startIdx = offset
}
if ok := zeroMatchPossible(str, offset, numGroups, start); ok {
to_return[0] = Group{offset, offset}
return true, to_return, offset + 1
}
}
return false, []Group{}, offset + 1
}
// Hold a list of match indices for the current run. When we // Hold a list of match indices for the current run. When we
// can no longer find a match, the match with the largest range is // can no longer find a match, the match with the largest range is
@ -366,7 +348,7 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
for _, state := range currentStates { for _, state := range currentStates {
// Only add the match if the start index is in bounds. If the state has an assertion, // Only add the match if the start index is in bounds. If the state has an assertion,
// make sure the assertion checks out. // make sure the assertion checks out.
if state.isLast && startIdx < len(str) { if state.isLast && i <= len(str) {
if state.assert == NONE || state.checkAssertion(str, i) { if state.assert == NONE || state.checkAssertion(str, i) {
for j := 1; j < numGroups+1; j++ { for j := 1; j < numGroups+1; j++ {
tempIndices[j] = state.threadGroups[j] tempIndices[j] = state.threadGroups[j]

Loading…
Cancel
Save