Implement PCRE Matching (prefer left-branch) #2
@@ -1,6 +1,7 @@
|
|||||||
package regex
|
package regex
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"container/heap"
|
||||||
"fmt"
|
"fmt"
|
||||||
"slices"
|
"slices"
|
||||||
"sort"
|
"sort"
|
||||||
@@ -271,7 +272,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
foundPath := false
|
foundPath := false
|
||||||
startIdx := offset
|
startIdx := offset
|
||||||
endIdx := offset
|
endIdx := offset
|
||||||
currentStates := make([]*nfaState, 0)
|
currentStates := &priorityQueue{}
|
||||||
|
heap.Init(currentStates)
|
||||||
tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
|
tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
|
||||||
i := offset // Index in string
|
i := offset // Index in string
|
||||||
startingFrom := i // Store starting index
|
startingFrom := i // Store starting index
|
||||||
@@ -301,16 +303,19 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
// tempIndices[start.groupNum].startIdx = i
|
// tempIndices[start.groupNum].startIdx = i
|
||||||
}
|
}
|
||||||
|
|
||||||
currentStates = append(currentStates, start)
|
start.threadSP = i
|
||||||
|
heap.Push(currentStates, newPriorQueueItem(start))
|
||||||
// Main loop
|
// Main loop
|
||||||
for i < len(str) {
|
for currentStates.Len() > 0 {
|
||||||
|
currentState := heap.Pop(currentStates)
|
||||||
foundPath = false
|
foundPath = false
|
||||||
|
|
||||||
zeroStates := make([]*nfaState, 0)
|
zeroStates := make([]*nfaState, 0)
|
||||||
// Keep taking zero-states, until there are no more left to take
|
// Keep taking zero-states, until there are no more left to take
|
||||||
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
|
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
|
||||||
zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
|
topStateItem := currentStates.peek()
|
||||||
|
topState := topStateItem.(*priorQueueItem).state
|
||||||
|
zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
|
||||||
tempStates = append(tempStates, zeroStates...)
|
tempStates = append(tempStates, zeroStates...)
|
||||||
num_appended := 0
|
num_appended := 0
|
||||||
for isZero == true {
|
for isZero == true {
|
||||||
@@ -320,8 +325,13 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if isZero == true {
|
||||||
|
currentStates.Pop()
|
||||||
|
}
|
||||||
|
|
||||||
currentStates = slices.Concat(currentStates, tempStates)
|
for _, state := range tempStates {
|
||||||
|
heap.Push(currentStates, newPriorQueueItem(state))
|
||||||
|
}
|
||||||
tempStates = nil
|
tempStates = nil
|
||||||
|
|
||||||
// Take any transitions corresponding to current character
|
// Take any transitions corresponding to current character
|
||||||
@@ -331,10 +341,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
|
var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
|
||||||
lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
|
lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
|
||||||
for numStatesMatched == 0 && lastStateInList == false {
|
for numStatesMatched == 0 && lastStateInList == false {
|
||||||
if len(currentStates) == 0 {
|
if currentStates.Len() == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
state, _ := pop(¤tStates)
|
stateItem := heap.Pop(currentStates)
|
||||||
|
state := stateItem.(*priorQueueItem).state
|
||||||
matches, numMatches := state.matchesFor(str, i)
|
matches, numMatches := state.matchesFor(str, i)
|
||||||
if numMatches > 0 {
|
if numMatches > 0 {
|
||||||
numStatesMatched++
|
numStatesMatched++
|
||||||
@@ -344,6 +355,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
if m.threadGroups == nil {
|
if m.threadGroups == nil {
|
||||||
m.threadGroups = newMatch(numGroups + 1)
|
m.threadGroups = newMatch(numGroups + 1)
|
||||||
}
|
}
|
||||||
|
m.threadSP = state.threadSP + 1
|
||||||
copy(m.threadGroups, state.threadGroups)
|
copy(m.threadGroups, state.threadGroups)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -382,7 +394,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
// a. A last-state
|
// a. A last-state
|
||||||
// b. Empty
|
// b. Empty
|
||||||
// c. Doesn't assert anything
|
// c. Doesn't assert anything
|
||||||
for _, s := range currentStates {
|
for _, stateItem := range *currentStates {
|
||||||
|
s := stateItem.state
|
||||||
if s.isLast && s.isEmpty && s.assert == noneAssert {
|
if s.isLast && s.isEmpty && s.assert == noneAssert {
|
||||||
lastStatePtr = s
|
lastStatePtr = s
|
||||||
lastStateInList = true
|
lastStateInList = true
|
||||||
@@ -403,7 +416,10 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
|
|
||||||
// Check if we can find a zero-length match
|
// Check if we can find a zero-length match
|
||||||
if foundPath == false {
|
if foundPath == false {
|
||||||
if ok := zeroMatchPossible(str, i, numGroups, currentStates...); ok {
|
currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
|
||||||
|
return item.state
|
||||||
|
})
|
||||||
|
if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
|
||||||
if tempIndices[0].IsValid() == false {
|
if tempIndices[0].IsValid() == false {
|
||||||
tempIndices[0] = Group{startIdx, startIdx}
|
tempIndices[0] = Group{startIdx, startIdx}
|
||||||
}
|
}
|
||||||
@@ -423,8 +439,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
}
|
}
|
||||||
return false, []Group{}, startIdx
|
return false, []Group{}, startIdx
|
||||||
}
|
}
|
||||||
currentStates = make([]*nfaState, len(tempStates))
|
currentStates = &priorityQueue{}
|
||||||
copy(currentStates, tempStates)
|
slices.Reverse(tempStates)
|
||||||
|
for _, state := range tempStates {
|
||||||
|
heap.Push(currentStates, newPriorQueueItem(state))
|
||||||
|
}
|
||||||
tempStates = nil
|
tempStates = nil
|
||||||
|
|
||||||
i++
|
i++
|
||||||
@@ -432,21 +451,28 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
|
|
||||||
// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
|
// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
|
||||||
// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
|
// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
|
||||||
zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
|
if currentStates.Len() > 0 {
|
||||||
tempStates = append(tempStates, zeroStates...)
|
topStateItem := currentStates.peek()
|
||||||
num_appended := 0 // Number of unique states addded to tempStates
|
topState := topStateItem.(*priorQueueItem).state
|
||||||
for isZero == true {
|
zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
|
||||||
zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
|
tempStates = append(tempStates, zeroStates...)
|
||||||
tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
|
num_appended := 0 // Number of unique states addded to tempStates
|
||||||
if num_appended == 0 { // Break if we haven't appended any more unique values
|
for isZero == true {
|
||||||
break
|
zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
|
||||||
|
tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
|
||||||
|
if num_appended == 0 { // Break if we haven't appended any more unique values
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
currentStates = append(currentStates, tempStates...)
|
for _, state := range tempStates {
|
||||||
|
heap.Push(currentStates, newPriorQueueItem(state))
|
||||||
|
}
|
||||||
tempStates = nil
|
tempStates = nil
|
||||||
|
|
||||||
for _, state := range currentStates {
|
for _, stateItem := range *currentStates {
|
||||||
|
state := stateItem.state
|
||||||
// Only add the match if the start index is in bounds. If the state has an assertion,
|
// Only add the match if the start index is in bounds. If the state has an assertion,
|
||||||
// make sure the assertion checks out.
|
// make sure the assertion checks out.
|
||||||
if state.isLast && i <= len(str) {
|
if state.isLast && i <= len(str) {
|
||||||
|
Reference in New Issue
Block a user