@ -1,6 +1,7 @@
package regex
import (
"container/heap"
"fmt"
"slices"
"sort"
@ -271,7 +272,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
foundPath := false
startIdx := offset
endIdx := offset
currentStates := make ( [ ] * nfaState , 0 )
currentStates := & priorityQueue { }
heap . Init ( currentStates )
tempStates := make ( [ ] * nfaState , 0 ) // Used to store states that should be used in next loop iteration
i := offset // Index in string
startingFrom := i // Store starting index
@ -301,16 +303,19 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
// tempIndices[start.groupNum].startIdx = i
}
currentStates = append ( currentStates , start )
start. threadSP = i
heap . Push ( currentStates , newPriorQueueItem ( start ) )
// Main loop
for i < len ( str ) {
for currentStates . Len ( ) > 0 {
currentState := heap . Pop ( currentStates )
foundPath = false
zeroStates := make ( [ ] * nfaState , 0 )
// Keep taking zero-states, until there are no more left to take
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
zeroStates , isZero := takeZeroState ( currentStates , numGroups , i )
topStateItem := currentStates . peek ( )
topState := topStateItem . ( * priorQueueItem ) . state
zeroStates , isZero := takeZeroState ( [ ] * nfaState { topState } , numGroups , i )
tempStates = append ( tempStates , zeroStates ... )
num_appended := 0
for isZero == true {
@ -320,8 +325,13 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
break
}
}
if isZero == true {
currentStates . Pop ( )
}
currentStates = slices . Concat ( currentStates , tempStates )
for _ , state := range tempStates {
heap . Push ( currentStates , newPriorQueueItem ( state ) )
}
tempStates = nil
// Take any transitions corresponding to current character
@ -331,10 +341,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
var lastStatePtr * nfaState = nil // Pointer to the last-state, if it was found
lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
for numStatesMatched == 0 && lastStateInList == false {
if len ( currentStates ) == 0 {
if currentStates . Len ( ) == 0 {
break
}
state , _ := pop ( & currentStates )
stateItem := heap . Pop ( currentStates )
state := stateItem . ( * priorQueueItem ) . state
matches , numMatches := state . matchesFor ( str , i )
if numMatches > 0 {
numStatesMatched ++
@ -344,6 +355,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
if m . threadGroups == nil {
m . threadGroups = newMatch ( numGroups + 1 )
}
m . threadSP = state . threadSP + 1
copy ( m . threadGroups , state . threadGroups )
}
}
@ -382,7 +394,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
// a. A last-state
// b. Empty
// c. Doesn't assert anything
for _ , s := range currentStates {
for _ , stateItem := range * currentStates {
s := stateItem . state
if s . isLast && s . isEmpty && s . assert == noneAssert {
lastStatePtr = s
lastStateInList = true
@ -403,7 +416,10 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
// Check if we can find a zero-length match
if foundPath == false {
if ok := zeroMatchPossible ( str , i , numGroups , currentStates ... ) ; ok {
currentStatesList := funcMap ( * currentStates , func ( item * priorQueueItem ) * nfaState {
return item . state
} )
if ok := zeroMatchPossible ( str , i , numGroups , currentStatesList ... ) ; ok {
if tempIndices [ 0 ] . IsValid ( ) == false {
tempIndices [ 0 ] = Group { startIdx , startIdx }
}
@ -423,8 +439,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
}
return false , [ ] Group { } , startIdx
}
currentStates = make ( [ ] * nfaState , len ( tempStates ) )
copy ( currentStates , tempStates )
currentStates = & priorityQueue { }
slices . Reverse ( tempStates )
for _ , state := range tempStates {
heap . Push ( currentStates , newPriorQueueItem ( state ) )
}
tempStates = nil
i ++
@ -432,7 +451,10 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
zeroStates , isZero := takeZeroState ( currentStates , numGroups , i )
if currentStates . Len ( ) > 0 {
topStateItem := currentStates . peek ( )
topState := topStateItem . ( * priorQueueItem ) . state
zeroStates , isZero := takeZeroState ( [ ] * nfaState { topState } , numGroups , i )
tempStates = append ( tempStates , zeroStates ... )
num_appended := 0 // Number of unique states addded to tempStates
for isZero == true {
@ -442,11 +464,15 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
break
}
}
}
currentStates = append ( currentStates , tempStates ... )
for _ , state := range tempStates {
heap . Push ( currentStates , newPriorQueueItem ( state ) )
}
tempStates = nil
for _ , state := range currentStates {
for _ , stateItem := range * currentStates {
state := stateItem . state
// Only add the match if the start index is in bounds. If the state has an assertion,
// make sure the assertion checks out.
if state . isLast && i <= len ( str ) {