If the NFA starts with an assertion, make sure it's true before doing anything else. Also, check for last-state _lookaround_ rather than just last state, before breaking (instead of aborting) when the assertion fails

master
Aadhavan Srinivasan 2 months ago
parent eb6a044ecf
commit cbd6ea136b

@ -133,6 +133,14 @@ func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchInde
i := offset // Index in string
startingFrom := i // Store starting index
// If the first state is an assertion, makes sure the assertion
// is true before we do _anything_ else.
if start.assert != NONE {
if start.checkAssertion(str, offset) == false {
i++
return false, MatchIndex{}, i
}
}
// Increment until we hit a character matching the start state (assuming not 0-state)
if start.isEmpty == false {
for i < len(str) && !start.contentContains(str, i) {
@ -171,10 +179,10 @@ func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchInde
tempStates = nil
// Take any transitions corresponding to current character
numStatesMatched := 0 // The number of states which had at least 1 match for this round
assertionFailed := false // Whether or not an assertion failed for this round
lastStateInList := false // Whether or not a last state was in our list of states
// lastStateLookaround := false // Whether or not a last state (that is also a lookaround) matched
numStatesMatched := 0 // The number of states which had at least 1 match for this round
assertionFailed := false // Whether or not an assertion failed for this round
lastStateInList := false // Whether or not a last state was in our list of states
lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
for _, state := range currentStates {
matches, numMatches := state.matchesFor(str, i)
if numMatches > 0 {
@ -186,17 +194,24 @@ func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchInde
assertionFailed = true
}
if state.isLast {
if state.isLookaround() {
lastLookaroundInList = true
}
lastStateInList = true
}
}
if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
// One of the states in our list was a last state. In this case, we
// don't abort upon the failure of an assertion, because we have found
// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
// state. The explanation below is my attempt to explain this behavior.
// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
//
// One of the states in our list was a last state and a lookaround. In this case, we
// don't abort upon failure of the assertion, because we have found
// another path to a final state.
// Even if the last state _was_ an assertion, we can use the previously
// saved indices to find a match.
if lastStateInList {
if lastLookaroundInList {
break
} else {
if i == startingFrom {

Loading…
Cancel
Save