diff --git a/regex/matching.go b/regex/matching.go index 3252742..d504801 100644 --- a/regex/matching.go +++ b/regex/matching.go @@ -74,58 +74,58 @@ func getZeroGroup(m Match) Group { // given slice. It returns the resulting states. If any of the resulting states is a 0-state, // the second ret val is true. // If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index. -func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) { - for _, state := range states { - if len(state.transitions[epsilon]) > 0 { - for _, s := range state.transitions[epsilon] { - if s.threadGroups == nil { - s.threadGroups = newMatch(numGroups + 1) - } - copy(s.threadGroups, state.threadGroups) - if s.groupBegin { - s.threadGroups[s.groupNum].StartIdx = idx - // openParenGroups = append(openParenGroups, s.groupNum) - } - if s.groupEnd { - s.threadGroups[s.groupNum].EndIdx = idx - // closeParenGroups = append(closeParenGroups, s.groupNum) - } - } - rtv = append(rtv, state.transitions[epsilon]...) - } - } - for _, state := range rtv { - if len(state.transitions[epsilon]) > 0 { - return rtv, true - } - } - return rtv, false -} +//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) { +// for _, state := range states { +// if len(state.transitions[epsilon]) > 0 { +// for _, s := range state.transitions[epsilon] { +// if s.threadGroups == nil { +// s.threadGroups = newMatch(numGroups + 1) +// } +// copy(s.threadGroups, state.threadGroups) +// if s.groupBegin { +// s.threadGroups[s.groupNum].StartIdx = idx +// // openParenGroups = append(openParenGroups, s.groupNum) +// } +// if s.groupEnd { +// s.threadGroups[s.groupNum].EndIdx = idx +// // closeParenGroups = append(closeParenGroups, s.groupNum) +// } +// } +// rtv = append(rtv, state.transitions[epsilon]...) +// } +// } +// for _, state := range rtv { +// if len(state.transitions[epsilon]) > 0 { +// return rtv, true +// } +// } +// return rtv, false +//} // zeroMatchPossible returns true if a zero-length match is possible // from any of the given states, given the string and our position in it. // It uses the same algorithm to find zero-states as the one inside the loop, // so I should probably put it in a function. -func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool { - zeroStates, isZero := takeZeroState(states, numGroups, idx) - tempstates := make([]*nfaState, 0, len(zeroStates)+len(states)) - tempstates = append(tempstates, states...) - tempstates = append(tempstates, zeroStates...) - num_appended := 0 // number of unique states addded to tempstates - for isZero == true { - zeroStates, isZero = takeZeroState(tempstates, numGroups, idx) - tempstates, num_appended = uniqueAppend(tempstates, zeroStates...) - if num_appended == 0 { // break if we haven't appended any more unique values - break - } - } - for _, state := range tempstates { - if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast { - return true - } - } - return false -} +//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool { +// zeroStates, isZero := takeZeroState(states, numGroups, idx) +// tempstates := make([]*nfaState, 0, len(zeroStates)+len(states)) +// tempstates = append(tempstates, states...) +// tempstates = append(tempstates, zeroStates...) +// num_appended := 0 // number of unique states addded to tempstates +// for isZero == true { +// zeroStates, isZero = takeZeroState(tempstates, numGroups, idx) +// tempstates, num_appended = uniqueAppend(tempstates, zeroStates...) +// if num_appended == 0 { // break if we haven't appended any more unique values +// break +// } +// } +// for _, state := range tempstates { +// if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast { +// return true +// } +// } +// return false +//} // Prunes the slice by removing overlapping indices. func pruneIndices(indices []Match) []Match { @@ -376,17 +376,26 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in // Alternation - enqueue left then right state, and continue if currentState.isAlternation { - leftState := currentState.leftState - copyThread(leftState, currentState) - currentStates = append(currentStates, *currentState.leftState) - rightState := currentState.rightState - copyThread(rightState, currentState) - currentStates = append(currentStates, *currentState.rightState) + if currentState.isKleene { // Reverse order of adding things + rightState := currentState.splitState + copyThread(rightState, currentState) + currentStates = append(currentStates, *currentState.splitState) + leftState := currentState.next + copyThread(leftState, currentState) + currentStates = append(currentStates, *currentState.next) + } else { + leftState := currentState.next + copyThread(leftState, currentState) + currentStates = append(currentStates, *currentState.next) + rightState := currentState.splitState + copyThread(rightState, currentState) + currentStates = append(currentStates, *currentState.splitState) + } continue } // Empty state - enqueue next state, do _not_ increment the SP - if currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false { + if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false { isEmptyAndNoAssertion = true } @@ -396,12 +405,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in if isEmptyAndNoAssertion || foundMatch { allMatches := make([]nfaState, 0) - for _, v := range currentState.transitions { - dereferenced := funcMap(v, func(s *nfaState) nfaState { - return *s - }) - allMatches = append(allMatches, dereferenced...) - } + allMatches = append(allMatches, *(currentState.next)) slices.Reverse(allMatches) for i := range allMatches { copyThread(&allMatches[i], currentState) @@ -419,24 +423,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in } if currentState.isLast && len(nextStates) == 0 { // Last state reached - if foundMatch { - if currentState.assert != noneAssert { - currentState.threadGroups[0].EndIdx = idx - } else { - currentState.threadGroups[0].EndIdx = idx + 1 - } - if idx == currentState.threadGroups[0].StartIdx { - idx += 1 - } - return true, currentState.threadGroups, idx - } else if isEmptyAndNoAssertion { - currentState.threadGroups[0].EndIdx = idx - if idx == currentState.threadGroups[0].StartIdx { - idx++ - } - return true, currentState.threadGroups, idx + currentState.threadGroups[0].EndIdx = idx + if idx == currentState.threadGroups[0].StartIdx { + idx += 1 } - + return true, currentState.threadGroups, idx } } currentStates = append([]nfaState{}, nextStates...)