Compare commits
3 Commits
ef476e8875
...
bc32e0cb76
Author | SHA1 | Date | |
---|---|---|---|
bc32e0cb76 | |||
ad0f7d0178 | |||
4e597f8eb1 |
@@ -2,6 +2,7 @@ package regex
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"sort"
|
||||
)
|
||||
|
||||
@@ -320,7 +321,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
}
|
||||
}
|
||||
|
||||
currentStates, _ = uniqueAppend(currentStates, tempStates...)
|
||||
currentStates = slices.Concat(currentStates, tempStates)
|
||||
tempStates = nil
|
||||
|
||||
// Take any transitions corresponding to current character
|
||||
@@ -329,11 +330,15 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
lastStateInList := false // Whether or not a last state was in our list of states
|
||||
var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
|
||||
lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
|
||||
for _, state := range currentStates {
|
||||
for numStatesMatched == 0 && lastStateInList == false {
|
||||
if len(currentStates) == 0 {
|
||||
break
|
||||
}
|
||||
state, _ := pop(¤tStates)
|
||||
matches, numMatches := state.matchesFor(str, i)
|
||||
if numMatches > 0 {
|
||||
numStatesMatched++
|
||||
tempStates = append(tempStates, matches...)
|
||||
tempStates = append([]*nfaState(nil), matches...)
|
||||
foundPath = true
|
||||
for _, m := range matches {
|
||||
if m.threadGroups == nil {
|
||||
@@ -383,12 +388,17 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
lastStateInList = true
|
||||
}
|
||||
}
|
||||
if lastStateInList { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
|
||||
if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
|
||||
for j := 1; j < numGroups+1; j++ {
|
||||
tempIndices[j] = lastStatePtr.threadGroups[j]
|
||||
}
|
||||
endIdx = i
|
||||
tempIndices[0] = Group{startIdx, endIdx}
|
||||
if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
|
||||
return true, tempIndices, tempIndices[0].EndIdx + 1
|
||||
} else {
|
||||
return true, tempIndices, tempIndices[0].EndIdx
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we can find a zero-length match
|
||||
|
@@ -31,6 +31,8 @@ type nfaState struct {
|
||||
output []*nfaState // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
|
||||
transitions map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
|
||||
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
|
||||
isQuestion bool // Identifies whether current node is a 0-state representing the question operator
|
||||
isAlternation bool // Identifies whether current node is a 0-state representing an alternation
|
||||
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
|
||||
allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
|
||||
except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
|
||||
@@ -70,6 +72,8 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
|
||||
output: make([]*nfaState, len(stateToClone.output)),
|
||||
transitions: make(map[int][]*nfaState),
|
||||
isKleene: stateToClone.isKleene,
|
||||
isQuestion: stateToClone.isQuestion,
|
||||
isAlternation: stateToClone.isAlternation,
|
||||
assert: stateToClone.assert,
|
||||
zeroMatchFound: stateToClone.zeroMatchFound,
|
||||
allChars: stateToClone.allChars,
|
||||
@@ -341,6 +345,7 @@ func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
|
||||
}
|
||||
toReturn.content = newContents(epsilon)
|
||||
toReturn.isEmpty = true
|
||||
toReturn.isAlternation = true
|
||||
|
||||
return toReturn
|
||||
}
|
||||
@@ -351,6 +356,7 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
|
||||
s2.content = newContents(epsilon)
|
||||
s2.output = append(s2.output, s2)
|
||||
s2.isEmpty = true
|
||||
s2.isQuestion = true
|
||||
s3 := alternate(s1, s2)
|
||||
return s3
|
||||
}
|
||||
|
76
regex/priorityQueue.go
Normal file
76
regex/priorityQueue.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package regex
|
||||
|
||||
import "container/heap"
|
||||
|
||||
// Implement a priority queue using container/heap
|
||||
|
||||
const (
|
||||
min_priority int = iota
|
||||
zerostate_priority
|
||||
alternation_priority
|
||||
kleene_priority
|
||||
char_priority
|
||||
max_priority
|
||||
)
|
||||
|
||||
func getPriority(state *nfaState) int {
|
||||
if state.isKleene {
|
||||
return kleene_priority
|
||||
} else if state.isQuestion || state.isAlternation {
|
||||
return alternation_priority
|
||||
} else {
|
||||
if state.isEmpty {
|
||||
return zerostate_priority
|
||||
} else {
|
||||
return char_priority
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type priorQueueItem struct {
|
||||
state *nfaState
|
||||
priority int
|
||||
index int
|
||||
}
|
||||
|
||||
type priorityQueue []*priorQueueItem
|
||||
|
||||
func (pq priorityQueue) Len() int {
|
||||
return len(pq)
|
||||
}
|
||||
|
||||
func (pq priorityQueue) Less(i, j int) bool {
|
||||
if pq[i].priority == pq[j].priority {
|
||||
return pq[i].index > pq[j].index
|
||||
}
|
||||
return pq[i].priority > pq[j].priority // We want max-heap, so we use greater-than
|
||||
}
|
||||
|
||||
func (pq priorityQueue) Swap(i, j int) {
|
||||
pq[i], pq[j] = pq[j], pq[i]
|
||||
pq[i].index = i
|
||||
pq[j].index = j
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) Push(x any) {
|
||||
length := len(*pq)
|
||||
item := x.(*priorQueueItem)
|
||||
item.index = length
|
||||
*pq = append(*pq, item)
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) Pop() any {
|
||||
old := *pq
|
||||
n := len(old)
|
||||
item := old[n-1]
|
||||
old[n-1] = nil
|
||||
item.index = -1
|
||||
*pq = old[0 : n-1]
|
||||
return item
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) update(item *priorQueueItem, value *nfaState, priority int) {
|
||||
item.state = value
|
||||
item.priority = priority
|
||||
heap.Fix(pq, item.index)
|
||||
}
|
Reference in New Issue
Block a user