Compare commits
8 Commits
posixStyle
...
ad273b0c68
| Author | SHA1 | Date | |
|---|---|---|---|
| ad273b0c68 | |||
| e167cdb2cb | |||
| 1fd48ae614 | |||
| 09812956ac | |||
| fbc9dfcc95 | |||
| bc32e0cb76 | |||
| ad0f7d0178 | |||
| 4e597f8eb1 |
4
Makefile
4
Makefile
@@ -6,8 +6,8 @@ fmt:
|
|||||||
vet: fmt
|
vet: fmt
|
||||||
go vet ./...
|
go vet ./...
|
||||||
buildLib: vet
|
buildLib: vet
|
||||||
go build -gcflags="-N -l" ./...
|
go build -gcflags="all=-N -l" ./...
|
||||||
buildCmd: buildLib
|
buildCmd: buildLib
|
||||||
go build -C cmd/ -gcflags="-N -l" -o re ./...
|
go build -C cmd/ -gcflags="all=-N -l" -o re ./...
|
||||||
test: buildCmd
|
test: buildCmd
|
||||||
go test -v ./...
|
go test -v ./...
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
package regex
|
package regex
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"container/heap"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"slices"
|
||||||
"sort"
|
"sort"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -270,7 +272,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
foundPath := false
|
foundPath := false
|
||||||
startIdx := offset
|
startIdx := offset
|
||||||
endIdx := offset
|
endIdx := offset
|
||||||
currentStates := make([]*nfaState, 0)
|
currentStates := &priorityQueue{}
|
||||||
|
heap.Init(currentStates)
|
||||||
tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
|
tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
|
||||||
i := offset // Index in string
|
i := offset // Index in string
|
||||||
startingFrom := i // Store starting index
|
startingFrom := i // Store starting index
|
||||||
@@ -300,16 +303,19 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
// tempIndices[start.groupNum].startIdx = i
|
// tempIndices[start.groupNum].startIdx = i
|
||||||
}
|
}
|
||||||
|
|
||||||
currentStates = append(currentStates, start)
|
start.threadSP = i
|
||||||
|
heap.Push(currentStates, newPriorQueueItem(start))
|
||||||
// Main loop
|
// Main loop
|
||||||
for i < len(str) {
|
for currentStates.Len() > 0 {
|
||||||
|
currentState := heap.Pop(currentStates)
|
||||||
foundPath = false
|
foundPath = false
|
||||||
|
|
||||||
zeroStates := make([]*nfaState, 0)
|
zeroStates := make([]*nfaState, 0)
|
||||||
// Keep taking zero-states, until there are no more left to take
|
// Keep taking zero-states, until there are no more left to take
|
||||||
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
|
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
|
||||||
zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
|
topStateItem := currentStates.peek()
|
||||||
|
topState := topStateItem.(*priorQueueItem).state
|
||||||
|
zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
|
||||||
tempStates = append(tempStates, zeroStates...)
|
tempStates = append(tempStates, zeroStates...)
|
||||||
num_appended := 0
|
num_appended := 0
|
||||||
for isZero == true {
|
for isZero == true {
|
||||||
@@ -319,8 +325,13 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if isZero == true {
|
||||||
|
currentStates.Pop()
|
||||||
|
}
|
||||||
|
|
||||||
currentStates, _ = uniqueAppend(currentStates, tempStates...)
|
for _, state := range tempStates {
|
||||||
|
heap.Push(currentStates, newPriorQueueItem(state))
|
||||||
|
}
|
||||||
tempStates = nil
|
tempStates = nil
|
||||||
|
|
||||||
// Take any transitions corresponding to current character
|
// Take any transitions corresponding to current character
|
||||||
@@ -329,16 +340,22 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
lastStateInList := false // Whether or not a last state was in our list of states
|
lastStateInList := false // Whether or not a last state was in our list of states
|
||||||
var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
|
var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
|
||||||
lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
|
lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
|
||||||
for _, state := range currentStates {
|
for numStatesMatched == 0 && lastStateInList == false {
|
||||||
|
if currentStates.Len() == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
stateItem := heap.Pop(currentStates)
|
||||||
|
state := stateItem.(*priorQueueItem).state
|
||||||
matches, numMatches := state.matchesFor(str, i)
|
matches, numMatches := state.matchesFor(str, i)
|
||||||
if numMatches > 0 {
|
if numMatches > 0 {
|
||||||
numStatesMatched++
|
numStatesMatched++
|
||||||
tempStates = append(tempStates, matches...)
|
tempStates = append([]*nfaState(nil), matches...)
|
||||||
foundPath = true
|
foundPath = true
|
||||||
for _, m := range matches {
|
for _, m := range matches {
|
||||||
if m.threadGroups == nil {
|
if m.threadGroups == nil {
|
||||||
m.threadGroups = newMatch(numGroups + 1)
|
m.threadGroups = newMatch(numGroups + 1)
|
||||||
}
|
}
|
||||||
|
m.threadSP = state.threadSP + 1
|
||||||
copy(m.threadGroups, state.threadGroups)
|
copy(m.threadGroups, state.threadGroups)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -377,23 +394,32 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
// a. A last-state
|
// a. A last-state
|
||||||
// b. Empty
|
// b. Empty
|
||||||
// c. Doesn't assert anything
|
// c. Doesn't assert anything
|
||||||
for _, s := range currentStates {
|
for _, stateItem := range *currentStates {
|
||||||
|
s := stateItem.state
|
||||||
if s.isLast && s.isEmpty && s.assert == noneAssert {
|
if s.isLast && s.isEmpty && s.assert == noneAssert {
|
||||||
lastStatePtr = s
|
lastStatePtr = s
|
||||||
lastStateInList = true
|
lastStateInList = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if lastStateInList { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
|
if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
|
||||||
for j := 1; j < numGroups+1; j++ {
|
for j := 1; j < numGroups+1; j++ {
|
||||||
tempIndices[j] = lastStatePtr.threadGroups[j]
|
tempIndices[j] = lastStatePtr.threadGroups[j]
|
||||||
}
|
}
|
||||||
endIdx = i
|
endIdx = i
|
||||||
tempIndices[0] = Group{startIdx, endIdx}
|
tempIndices[0] = Group{startIdx, endIdx}
|
||||||
|
if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
|
||||||
|
return true, tempIndices, tempIndices[0].EndIdx + 1
|
||||||
|
} else {
|
||||||
|
return true, tempIndices, tempIndices[0].EndIdx
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if we can find a zero-length match
|
// Check if we can find a zero-length match
|
||||||
if foundPath == false {
|
if foundPath == false {
|
||||||
if ok := zeroMatchPossible(str, i, numGroups, currentStates...); ok {
|
currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
|
||||||
|
return item.state
|
||||||
|
})
|
||||||
|
if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
|
||||||
if tempIndices[0].IsValid() == false {
|
if tempIndices[0].IsValid() == false {
|
||||||
tempIndices[0] = Group{startIdx, startIdx}
|
tempIndices[0] = Group{startIdx, startIdx}
|
||||||
}
|
}
|
||||||
@@ -413,8 +439,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
}
|
}
|
||||||
return false, []Group{}, startIdx
|
return false, []Group{}, startIdx
|
||||||
}
|
}
|
||||||
currentStates = make([]*nfaState, len(tempStates))
|
currentStates = &priorityQueue{}
|
||||||
copy(currentStates, tempStates)
|
slices.Reverse(tempStates)
|
||||||
|
for _, state := range tempStates {
|
||||||
|
heap.Push(currentStates, newPriorQueueItem(state))
|
||||||
|
}
|
||||||
tempStates = nil
|
tempStates = nil
|
||||||
|
|
||||||
i++
|
i++
|
||||||
@@ -422,21 +451,28 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
|
|
||||||
// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
|
// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
|
||||||
// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
|
// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
|
||||||
zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
|
if currentStates.Len() > 0 {
|
||||||
tempStates = append(tempStates, zeroStates...)
|
topStateItem := currentStates.peek()
|
||||||
num_appended := 0 // Number of unique states addded to tempStates
|
topState := topStateItem.(*priorQueueItem).state
|
||||||
for isZero == true {
|
zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
|
||||||
zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
|
tempStates = append(tempStates, zeroStates...)
|
||||||
tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
|
num_appended := 0 // Number of unique states addded to tempStates
|
||||||
if num_appended == 0 { // Break if we haven't appended any more unique values
|
for isZero == true {
|
||||||
break
|
zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
|
||||||
|
tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
|
||||||
|
if num_appended == 0 { // Break if we haven't appended any more unique values
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
currentStates = append(currentStates, tempStates...)
|
for _, state := range tempStates {
|
||||||
|
heap.Push(currentStates, newPriorQueueItem(state))
|
||||||
|
}
|
||||||
tempStates = nil
|
tempStates = nil
|
||||||
|
|
||||||
for _, state := range currentStates {
|
for _, stateItem := range *currentStates {
|
||||||
|
state := stateItem.state
|
||||||
// Only add the match if the start index is in bounds. If the state has an assertion,
|
// Only add the match if the start index is in bounds. If the state has an assertion,
|
||||||
// make sure the assertion checks out.
|
// make sure the assertion checks out.
|
||||||
if state.isLast && i <= len(str) {
|
if state.isLast && i <= len(str) {
|
||||||
|
|||||||
@@ -31,6 +31,8 @@ type nfaState struct {
|
|||||||
output []*nfaState // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
|
output []*nfaState // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
|
||||||
transitions map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
|
transitions map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
|
||||||
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
|
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
|
||||||
|
isQuestion bool // Identifies whether current node is a 0-state representing the question operator
|
||||||
|
isAlternation bool // Identifies whether current node is a 0-state representing an alternation
|
||||||
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
|
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
|
||||||
allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
|
allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
|
||||||
except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
|
except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
|
||||||
@@ -43,6 +45,7 @@ type nfaState struct {
|
|||||||
// The following properties depend on the current match - I should think about resetting them for every match.
|
// The following properties depend on the current match - I should think about resetting them for every match.
|
||||||
zeroMatchFound bool // Whether or not the state has been used for a zero-length match - only relevant for zero states
|
zeroMatchFound bool // Whether or not the state has been used for a zero-length match - only relevant for zero states
|
||||||
threadGroups []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
|
threadGroups []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
|
||||||
|
threadSP int // The string pointer of the thread - where it is in the input string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clones the NFA starting from the given state.
|
// Clones the NFA starting from the given state.
|
||||||
@@ -70,6 +73,8 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
|
|||||||
output: make([]*nfaState, len(stateToClone.output)),
|
output: make([]*nfaState, len(stateToClone.output)),
|
||||||
transitions: make(map[int][]*nfaState),
|
transitions: make(map[int][]*nfaState),
|
||||||
isKleene: stateToClone.isKleene,
|
isKleene: stateToClone.isKleene,
|
||||||
|
isQuestion: stateToClone.isQuestion,
|
||||||
|
isAlternation: stateToClone.isAlternation,
|
||||||
assert: stateToClone.assert,
|
assert: stateToClone.assert,
|
||||||
zeroMatchFound: stateToClone.zeroMatchFound,
|
zeroMatchFound: stateToClone.zeroMatchFound,
|
||||||
allChars: stateToClone.allChars,
|
allChars: stateToClone.allChars,
|
||||||
@@ -116,6 +121,7 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
|
|||||||
}
|
}
|
||||||
// Assuming it hasn't been visited
|
// Assuming it hasn't been visited
|
||||||
state.threadGroups = nil
|
state.threadGroups = nil
|
||||||
|
state.threadSP = 0
|
||||||
visitedMap[state] = true
|
visitedMap[state] = true
|
||||||
for _, v := range state.transitions {
|
for _, v := range state.transitions {
|
||||||
for _, nextState := range v {
|
for _, nextState := range v {
|
||||||
@@ -341,6 +347,7 @@ func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
|
|||||||
}
|
}
|
||||||
toReturn.content = newContents(epsilon)
|
toReturn.content = newContents(epsilon)
|
||||||
toReturn.isEmpty = true
|
toReturn.isEmpty = true
|
||||||
|
toReturn.isAlternation = true
|
||||||
|
|
||||||
return toReturn
|
return toReturn
|
||||||
}
|
}
|
||||||
@@ -351,6 +358,7 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
|
|||||||
s2.content = newContents(epsilon)
|
s2.content = newContents(epsilon)
|
||||||
s2.output = append(s2.output, s2)
|
s2.output = append(s2.output, s2)
|
||||||
s2.isEmpty = true
|
s2.isEmpty = true
|
||||||
|
s2.isQuestion = true
|
||||||
s3 := alternate(s1, s2)
|
s3 := alternate(s1, s2)
|
||||||
return s3
|
return s3
|
||||||
}
|
}
|
||||||
|
|||||||
89
regex/priorityQueue.go
Normal file
89
regex/priorityQueue.go
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
package regex
|
||||||
|
|
||||||
|
import "container/heap"
|
||||||
|
|
||||||
|
// Implement a priority queue using container/heap
|
||||||
|
|
||||||
|
const (
|
||||||
|
min_priority int = iota
|
||||||
|
zerostate_priority
|
||||||
|
alternation_priority
|
||||||
|
kleene_priority
|
||||||
|
char_priority
|
||||||
|
max_priority
|
||||||
|
)
|
||||||
|
|
||||||
|
func getPriority(state *nfaState) int {
|
||||||
|
if state.isKleene {
|
||||||
|
return zerostate_priority
|
||||||
|
} else if state.isAlternation {
|
||||||
|
return alternation_priority
|
||||||
|
} else {
|
||||||
|
if state.isEmpty {
|
||||||
|
return zerostate_priority
|
||||||
|
} else {
|
||||||
|
return char_priority
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type priorQueueItem struct {
|
||||||
|
state *nfaState
|
||||||
|
priority int
|
||||||
|
index int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newPriorQueueItem(state *nfaState) *priorQueueItem {
|
||||||
|
return &priorQueueItem{
|
||||||
|
state: state,
|
||||||
|
index: -1,
|
||||||
|
priority: getPriority(state),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type priorityQueue []*priorQueueItem
|
||||||
|
|
||||||
|
func (pq priorityQueue) Len() int {
|
||||||
|
return len(pq)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq priorityQueue) Less(i, j int) bool {
|
||||||
|
if pq[i].priority == pq[j].priority {
|
||||||
|
return pq[i].index < pq[j].index
|
||||||
|
}
|
||||||
|
return pq[i].priority > pq[j].priority // We want max-heap, so we use greater-than
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq priorityQueue) Swap(i, j int) {
|
||||||
|
pq[i], pq[j] = pq[j], pq[i]
|
||||||
|
pq[i].index = i
|
||||||
|
pq[j].index = j
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq *priorityQueue) Push(x any) {
|
||||||
|
length := len(*pq)
|
||||||
|
item := x.(*priorQueueItem)
|
||||||
|
item.index = length
|
||||||
|
*pq = append(*pq, item)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq *priorityQueue) Pop() any {
|
||||||
|
old := *pq
|
||||||
|
n := len(old)
|
||||||
|
item := old[n-1]
|
||||||
|
old[n-1] = nil
|
||||||
|
item.index = -1
|
||||||
|
*pq = old[0 : n-1]
|
||||||
|
return item
|
||||||
|
}
|
||||||
|
func (pq *priorityQueue) peek() any {
|
||||||
|
queue := *pq
|
||||||
|
n := len(queue)
|
||||||
|
return queue[n-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pq *priorityQueue) update(item *priorQueueItem, value *nfaState, priority int) {
|
||||||
|
item.state = value
|
||||||
|
item.priority = priority
|
||||||
|
heap.Fix(pq, item.index)
|
||||||
|
}
|
||||||
@@ -701,7 +701,7 @@ func TestFind(t *testing.T) {
|
|||||||
if len(test.result) == 0 {
|
if len(test.result) == 0 {
|
||||||
return // Manually pass the test, because this is the expected behavior
|
return // Manually pass the test, because this is the expected behavior
|
||||||
} else {
|
} else {
|
||||||
t.Errorf("Wanted no match Got %v\n", groupIndex)
|
t.Errorf("Wanted %v Got no matches\n", test.result)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if groupIndex != test.result[0] {
|
if groupIndex != test.result[0] {
|
||||||
|
|||||||
Reference in New Issue
Block a user