Trying to emulate backtracking by using string pointers within threads (something similar to rsc's 2nd regexp article)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package regex
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sort"
|
||||
@@ -271,7 +272,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
foundPath := false
|
||||
startIdx := offset
|
||||
endIdx := offset
|
||||
currentStates := make([]*nfaState, 0)
|
||||
currentStates := &priorityQueue{}
|
||||
heap.Init(currentStates)
|
||||
tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
|
||||
i := offset // Index in string
|
||||
startingFrom := i // Store starting index
|
||||
@@ -301,16 +303,19 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
// tempIndices[start.groupNum].startIdx = i
|
||||
}
|
||||
|
||||
currentStates = append(currentStates, start)
|
||||
|
||||
start.threadSP = i
|
||||
heap.Push(currentStates, newPriorQueueItem(start))
|
||||
// Main loop
|
||||
for i < len(str) {
|
||||
for currentStates.Len() > 0 {
|
||||
currentState := heap.Pop(currentStates)
|
||||
foundPath = false
|
||||
|
||||
zeroStates := make([]*nfaState, 0)
|
||||
// Keep taking zero-states, until there are no more left to take
|
||||
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
|
||||
zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
|
||||
topStateItem := currentStates.peek()
|
||||
topState := topStateItem.(*priorQueueItem).state
|
||||
zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
|
||||
tempStates = append(tempStates, zeroStates...)
|
||||
num_appended := 0
|
||||
for isZero == true {
|
||||
@@ -320,8 +325,13 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
break
|
||||
}
|
||||
}
|
||||
if isZero == true {
|
||||
currentStates.Pop()
|
||||
}
|
||||
|
||||
currentStates = slices.Concat(currentStates, tempStates)
|
||||
for _, state := range tempStates {
|
||||
heap.Push(currentStates, newPriorQueueItem(state))
|
||||
}
|
||||
tempStates = nil
|
||||
|
||||
// Take any transitions corresponding to current character
|
||||
@@ -331,10 +341,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
|
||||
lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
|
||||
for numStatesMatched == 0 && lastStateInList == false {
|
||||
if len(currentStates) == 0 {
|
||||
if currentStates.Len() == 0 {
|
||||
break
|
||||
}
|
||||
state, _ := pop(¤tStates)
|
||||
stateItem := heap.Pop(currentStates)
|
||||
state := stateItem.(*priorQueueItem).state
|
||||
matches, numMatches := state.matchesFor(str, i)
|
||||
if numMatches > 0 {
|
||||
numStatesMatched++
|
||||
@@ -344,6 +355,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
if m.threadGroups == nil {
|
||||
m.threadGroups = newMatch(numGroups + 1)
|
||||
}
|
||||
m.threadSP = state.threadSP + 1
|
||||
copy(m.threadGroups, state.threadGroups)
|
||||
}
|
||||
}
|
||||
@@ -382,7 +394,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
// a. A last-state
|
||||
// b. Empty
|
||||
// c. Doesn't assert anything
|
||||
for _, s := range currentStates {
|
||||
for _, stateItem := range *currentStates {
|
||||
s := stateItem.state
|
||||
if s.isLast && s.isEmpty && s.assert == noneAssert {
|
||||
lastStatePtr = s
|
||||
lastStateInList = true
|
||||
@@ -403,7 +416,10 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
|
||||
// Check if we can find a zero-length match
|
||||
if foundPath == false {
|
||||
if ok := zeroMatchPossible(str, i, numGroups, currentStates...); ok {
|
||||
currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
|
||||
return item.state
|
||||
})
|
||||
if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
|
||||
if tempIndices[0].IsValid() == false {
|
||||
tempIndices[0] = Group{startIdx, startIdx}
|
||||
}
|
||||
@@ -423,8 +439,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
}
|
||||
return false, []Group{}, startIdx
|
||||
}
|
||||
currentStates = make([]*nfaState, len(tempStates))
|
||||
copy(currentStates, tempStates)
|
||||
currentStates = &priorityQueue{}
|
||||
slices.Reverse(tempStates)
|
||||
for _, state := range tempStates {
|
||||
heap.Push(currentStates, newPriorQueueItem(state))
|
||||
}
|
||||
tempStates = nil
|
||||
|
||||
i++
|
||||
@@ -432,21 +451,28 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
|
||||
// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
|
||||
// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
|
||||
zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
|
||||
tempStates = append(tempStates, zeroStates...)
|
||||
num_appended := 0 // Number of unique states addded to tempStates
|
||||
for isZero == true {
|
||||
zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
|
||||
tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
|
||||
if num_appended == 0 { // Break if we haven't appended any more unique values
|
||||
break
|
||||
if currentStates.Len() > 0 {
|
||||
topStateItem := currentStates.peek()
|
||||
topState := topStateItem.(*priorQueueItem).state
|
||||
zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
|
||||
tempStates = append(tempStates, zeroStates...)
|
||||
num_appended := 0 // Number of unique states addded to tempStates
|
||||
for isZero == true {
|
||||
zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
|
||||
tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
|
||||
if num_appended == 0 { // Break if we haven't appended any more unique values
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
currentStates = append(currentStates, tempStates...)
|
||||
for _, state := range tempStates {
|
||||
heap.Push(currentStates, newPriorQueueItem(state))
|
||||
}
|
||||
tempStates = nil
|
||||
|
||||
for _, state := range currentStates {
|
||||
for _, stateItem := range *currentStates {
|
||||
state := stateItem.state
|
||||
// Only add the match if the start index is in bounds. If the state has an assertion,
|
||||
// make sure the assertion checks out.
|
||||
if state.isLast && i <= len(str) {
|
||||
|
Reference in New Issue
Block a user