If the NFA starts with an assertion, make sure it's true before doing anything else. Also, check for last-state _lookaround_ rather than just last state, before breaking (instead of aborting) when the assertion fails

master
Aadhavan Srinivasan 4 weeks ago
parent eb6a044ecf
commit cbd6ea136b

@ -133,6 +133,14 @@ func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchInde
i := offset // Index in string i := offset // Index in string
startingFrom := i // Store starting index startingFrom := i // Store starting index
// If the first state is an assertion, makes sure the assertion
// is true before we do _anything_ else.
if start.assert != NONE {
if start.checkAssertion(str, offset) == false {
i++
return false, MatchIndex{}, i
}
}
// Increment until we hit a character matching the start state (assuming not 0-state) // Increment until we hit a character matching the start state (assuming not 0-state)
if start.isEmpty == false { if start.isEmpty == false {
for i < len(str) && !start.contentContains(str, i) { for i < len(str) && !start.contentContains(str, i) {
@ -171,10 +179,10 @@ func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchInde
tempStates = nil tempStates = nil
// Take any transitions corresponding to current character // Take any transitions corresponding to current character
numStatesMatched := 0 // The number of states which had at least 1 match for this round numStatesMatched := 0 // The number of states which had at least 1 match for this round
assertionFailed := false // Whether or not an assertion failed for this round assertionFailed := false // Whether or not an assertion failed for this round
lastStateInList := false // Whether or not a last state was in our list of states lastStateInList := false // Whether or not a last state was in our list of states
// lastStateLookaround := false // Whether or not a last state (that is also a lookaround) matched lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
for _, state := range currentStates { for _, state := range currentStates {
matches, numMatches := state.matchesFor(str, i) matches, numMatches := state.matchesFor(str, i)
if numMatches > 0 { if numMatches > 0 {
@ -186,17 +194,24 @@ func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchInde
assertionFailed = true assertionFailed = true
} }
if state.isLast { if state.isLast {
if state.isLookaround() {
lastLookaroundInList = true
}
lastStateInList = true lastStateInList = true
} }
} }
if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
// One of the states in our list was a last state. In this case, we // If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
// don't abort upon the failure of an assertion, because we have found // state. The explanation below is my attempt to explain this behavior.
// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
//
// One of the states in our list was a last state and a lookaround. In this case, we
// don't abort upon failure of the assertion, because we have found
// another path to a final state. // another path to a final state.
// Even if the last state _was_ an assertion, we can use the previously // Even if the last state _was_ an assertion, we can use the previously
// saved indices to find a match. // saved indices to find a match.
if lastStateInList { if lastLookaroundInList {
break break
} else { } else {
if i == startingFrom { if i == startingFrom {

Loading…
Cancel
Save