From cbd6ea136b02088b0c10eac723aa9a74707028ce Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Wed, 27 Nov 2024 11:46:38 -0500 Subject: [PATCH] If the NFA starts with an assertion, make sure it's true before doing anything else. Also, check for last-state _lookaround_ rather than just last state, before breaking (instead of aborting) when the assertion fails --- matching.go | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/matching.go b/matching.go index cc47d50..62e7131 100644 --- a/matching.go +++ b/matching.go @@ -133,6 +133,14 @@ func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchInde i := offset // Index in string startingFrom := i // Store starting index + // If the first state is an assertion, makes sure the assertion + // is true before we do _anything_ else. + if start.assert != NONE { + if start.checkAssertion(str, offset) == false { + i++ + return false, MatchIndex{}, i + } + } // Increment until we hit a character matching the start state (assuming not 0-state) if start.isEmpty == false { for i < len(str) && !start.contentContains(str, i) { @@ -171,10 +179,10 @@ func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchInde tempStates = nil // Take any transitions corresponding to current character - numStatesMatched := 0 // The number of states which had at least 1 match for this round - assertionFailed := false // Whether or not an assertion failed for this round - lastStateInList := false // Whether or not a last state was in our list of states - // lastStateLookaround := false // Whether or not a last state (that is also a lookaround) matched + numStatesMatched := 0 // The number of states which had at least 1 match for this round + assertionFailed := false // Whether or not an assertion failed for this round + lastStateInList := false // Whether or not a last state was in our list of states + lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states for _, state := range currentStates { matches, numMatches := state.matchesFor(str, i) if numMatches > 0 { @@ -186,17 +194,24 @@ func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchInde assertionFailed = true } if state.isLast { + if state.isLookaround() { + lastLookaroundInList = true + } lastStateInList = true } } if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed - // One of the states in our list was a last state. In this case, we - // don't abort upon the failure of an assertion, because we have found + // If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_ + // state. The explanation below is my attempt to explain this behavior. + // If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails. + // + // One of the states in our list was a last state and a lookaround. In this case, we + // don't abort upon failure of the assertion, because we have found // another path to a final state. // Even if the last state _was_ an assertion, we can use the previously // saved indices to find a match. - if lastStateInList { + if lastLookaroundInList { break } else { if i == startingFrom {