Used Pike's algorithm (an extension to Thompson's algorithm) (see Russ Cox's 2nd article); I think I almost have a working PCRE-style engine
This commit is contained in:
@@ -2,7 +2,6 @@ package regex
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"sort"
|
||||
)
|
||||
|
||||
@@ -252,32 +251,36 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
|
||||
return indices
|
||||
}
|
||||
|
||||
func addStateToList(idx int, list []nfaState, state nfaState) []nfaState {
|
||||
func addStateToList(idx int, list []nfaState, state nfaState, threadGroups []Group) []nfaState {
|
||||
if stateExists(list, state) {
|
||||
return list
|
||||
}
|
||||
if state.isKleene || state.isQuestion {
|
||||
copyThread(state.splitState, state)
|
||||
list = addStateToList(idx, list, *state.splitState, threadGroups)
|
||||
copyThread(state.next, state)
|
||||
list = addStateToList(idx, list, *state.next, threadGroups)
|
||||
return list
|
||||
}
|
||||
if state.isAlternation {
|
||||
copyThread(state.next, state)
|
||||
list = append(list, addStateToList(idx, list, *state.next)...)
|
||||
list = addStateToList(idx, list, *state.next, threadGroups)
|
||||
copyThread(state.splitState, state)
|
||||
list = append(list, addStateToList(idx, list, *state.splitState)...)
|
||||
return list
|
||||
}
|
||||
if state.isKleene {
|
||||
copyThread(state.splitState, state)
|
||||
list = append(list, addStateToList(idx, list, *state.splitState)...)
|
||||
copyThread(state.next, state)
|
||||
list = append(list, addStateToList(idx, list, *state.next)...)
|
||||
list = addStateToList(idx, list, *state.splitState, threadGroups)
|
||||
return list
|
||||
}
|
||||
|
||||
state.threadGroups = append([]Group{}, threadGroups...)
|
||||
if state.groupBegin {
|
||||
state.threadGroups[state.groupNum].StartIdx = idx
|
||||
return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...)
|
||||
}
|
||||
if state.groupEnd {
|
||||
state.threadGroups[state.groupNum].StartIdx = idx
|
||||
state.threadGroups[state.groupNum].EndIdx = idx
|
||||
return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...)
|
||||
}
|
||||
copyThread(state.next, state)
|
||||
return append(list, *state.next)
|
||||
state.threadGroups = append([]Group{}, threadGroups...)
|
||||
return append(list, state)
|
||||
|
||||
}
|
||||
|
||||
@@ -335,138 +338,113 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
||||
// tempIndices[start.groupNum].startIdx = i
|
||||
//}
|
||||
|
||||
currentStates = append(currentStates, *start)
|
||||
var foundMatch bool
|
||||
var isEmptyAndNoAssertion bool
|
||||
start.threadGroups = newMatch(numGroups + 1)
|
||||
start.threadGroups[0].StartIdx = i
|
||||
currentStates = addStateToList(i, currentStates, *start, start.threadGroups)
|
||||
var match Match = nil
|
||||
// var isEmptyAndNoAssertion bool
|
||||
// Main loop
|
||||
for idx := i; idx <= len(str); idx++ {
|
||||
if len(currentStates) == 0 {
|
||||
break
|
||||
}
|
||||
for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
|
||||
currentState := currentStates[currentStateIdx]
|
||||
foundMatch = false
|
||||
isEmptyAndNoAssertion = false
|
||||
|
||||
if currentState.threadGroups == nil {
|
||||
currentState.threadGroups = newMatch(numGroups + 1)
|
||||
currentState.threadGroups[0].StartIdx = idx
|
||||
}
|
||||
|
||||
if currentState.groupBegin {
|
||||
currentState.threadGroups[currentState.groupNum].StartIdx = idx
|
||||
// allMatches := make([]nfaState, 0)
|
||||
// for _, v := range currentState.transitions {
|
||||
// dereferenced := funcMap(v, func(s *nfaState) nfaState {
|
||||
// return *s
|
||||
// })
|
||||
// allMatches = append(allMatches, dereferenced...)
|
||||
// }
|
||||
// slices.Reverse(allMatches)
|
||||
// for i := range allMatches {
|
||||
// copyThread(&allMatches[i], currentState)
|
||||
// }
|
||||
// currentStates = append(currentStates, allMatches...)
|
||||
}
|
||||
if currentState.groupEnd {
|
||||
currentState.threadGroups[currentState.groupNum].EndIdx = idx
|
||||
// allMatches := make([]nfaState, 0)
|
||||
// for _, v := range currentState.transitions {
|
||||
// dereferenced := funcMap(v, func(s *nfaState) nfaState {
|
||||
// return *s
|
||||
// })
|
||||
// allMatches = append(allMatches, dereferenced...)
|
||||
// }
|
||||
// slices.Reverse(allMatches)
|
||||
// for i := range allMatches {
|
||||
// copyThread(&allMatches[i], currentState)
|
||||
// }
|
||||
// currentStates = append(currentStates, allMatches...)
|
||||
if currentState.isLast {
|
||||
currentState.threadGroups[0].EndIdx = idx
|
||||
match = append([]Group{}, currentState.threadGroups...)
|
||||
break
|
||||
} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
|
||||
if currentState.contentContains(str, idx) {
|
||||
nextStates = addStateToList(idx+1, nextStates, *currentState.next, currentState.threadGroups)
|
||||
}
|
||||
}
|
||||
|
||||
// if currentState.isKleene {
|
||||
// // Append the next-state (after the kleene), then append the kleene state
|
||||
// allMatches := make([]*nfaState, 0)
|
||||
// for _, v := range currentState.transitions {
|
||||
// allMatches = append(allMatches, v...)
|
||||
// if currentState.groupBegin {
|
||||
// currentState.threadGroups[currentState.groupNum].StartIdx = idx
|
||||
// }
|
||||
// slices.Reverse(allMatches)
|
||||
// for _, m := range allMatches {
|
||||
// m.threadGroups = currentState.threadGroups
|
||||
// m.threadSP = idx
|
||||
// if currentState.groupEnd {
|
||||
// currentState.threadGroups[currentState.groupNum].EndIdx = idx
|
||||
// }
|
||||
// currentStates = append(currentStates, allMatches...)
|
||||
//
|
||||
// // kleeneState := currentState.kleeneState
|
||||
// // kleeneState.threadGroups = currentState.threadGroups
|
||||
// // kleeneState.threadSP = currentState.threadSP
|
||||
// // currentStates = append(currentStates, kleeneState)
|
||||
// continue
|
||||
// }
|
||||
|
||||
// Alternation - enqueue left then right state, and continue
|
||||
if currentState.isAlternation {
|
||||
if currentState.isKleene { // Reverse order of adding things
|
||||
rightState := currentState.splitState
|
||||
copyThread(rightState, currentState)
|
||||
currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
|
||||
leftState := currentState.next
|
||||
copyThread(leftState, currentState)
|
||||
currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
|
||||
} else {
|
||||
leftState := currentState.next
|
||||
copyThread(leftState, currentState)
|
||||
currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
|
||||
rightState := currentState.splitState
|
||||
copyThread(rightState, currentState)
|
||||
currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// if currentState.isAlternation {
|
||||
// if currentState.isKleene { // Reverse order of adding things
|
||||
// rightState := currentState.splitState
|
||||
// copyThread(rightState, currentState)
|
||||
// currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
|
||||
// leftState := currentState.next
|
||||
// copyThread(leftState, currentState)
|
||||
// currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
|
||||
// } else {
|
||||
// leftState := currentState.next
|
||||
// copyThread(leftState, currentState)
|
||||
// currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
|
||||
// rightState := currentState.splitState
|
||||
// copyThread(rightState, currentState)
|
||||
// currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
|
||||
// }
|
||||
// continue
|
||||
// }
|
||||
|
||||
// Empty state - enqueue next state, do _not_ increment the SP
|
||||
if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
|
||||
isEmptyAndNoAssertion = true
|
||||
}
|
||||
|
||||
if currentState.contentContains(str, idx) {
|
||||
foundMatch = true
|
||||
}
|
||||
|
||||
if isEmptyAndNoAssertion || foundMatch {
|
||||
nextMatch := *(currentState.next)
|
||||
copyThread(&nextMatch, currentState)
|
||||
if currentState.groupBegin {
|
||||
// if !stateExists(currentStates, nextMatch) {
|
||||
currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
|
||||
//}
|
||||
} else if currentState.groupEnd {
|
||||
if !stateExists(currentStates, nextMatch) {
|
||||
currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
|
||||
}
|
||||
} else if currentState.assert != noneAssert {
|
||||
if !stateExists(currentStates, nextMatch) {
|
||||
currentStates = append(currentStates, nextMatch)
|
||||
}
|
||||
} else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
|
||||
if !stateExists(currentStates, nextMatch) {
|
||||
currentStates = append(currentStates, nextMatch)
|
||||
}
|
||||
} else {
|
||||
if !stateExists(nextStates, nextMatch) {
|
||||
nextStates = append(nextStates, nextMatch)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if currentState.isLast && len(nextStates) == 0 { // Last state reached
|
||||
currentState.threadGroups[0].EndIdx = idx
|
||||
if idx == currentState.threadGroups[0].StartIdx {
|
||||
idx += 1
|
||||
}
|
||||
return true, currentState.threadGroups, idx
|
||||
}
|
||||
// if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
|
||||
// isEmptyAndNoAssertion = true
|
||||
// }
|
||||
//
|
||||
// if currentState.contentContains(str, idx) {
|
||||
// foundMatch = true
|
||||
// }
|
||||
//
|
||||
// if isEmptyAndNoAssertion || foundMatch {
|
||||
// nextMatch := *(currentState.next)
|
||||
// copyThread(&nextMatch, currentState)
|
||||
// if currentState.groupBegin {
|
||||
// // if !stateExists(currentStates, nextMatch) {
|
||||
// currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
|
||||
// //}
|
||||
// } else if currentState.groupEnd {
|
||||
// if !stateExists(currentStates, nextMatch) {
|
||||
// currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
|
||||
// }
|
||||
// } else if currentState.assert != noneAssert {
|
||||
// if !stateExists(currentStates, nextMatch) {
|
||||
// currentStates = append(currentStates, nextMatch)
|
||||
// }
|
||||
// } else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
|
||||
// if !stateExists(currentStates, nextMatch) {
|
||||
// currentStates = append(currentStates, nextMatch)
|
||||
// }
|
||||
// } else {
|
||||
// if !stateExists(nextStates, nextMatch) {
|
||||
// nextStates = append(nextStates, nextMatch)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if currentState.isLast && len(nextStates) == 0 { // Last state reached
|
||||
// currentState.threadGroups[0].EndIdx = idx
|
||||
// if idx == currentState.threadGroups[0].StartIdx {
|
||||
// idx += 1
|
||||
// }
|
||||
// return true, currentState.threadGroups, idx
|
||||
// }
|
||||
}
|
||||
currentStates = append([]nfaState{}, nextStates...)
|
||||
nextStates = nil
|
||||
}
|
||||
if match != nil {
|
||||
if offset == match[0].EndIdx {
|
||||
return true, match, match[0].EndIdx + 1
|
||||
}
|
||||
return true, match, match[0].EndIdx
|
||||
}
|
||||
return false, []Group{}, i + 1
|
||||
// zeroStates := make([]*nfaState, 0)
|
||||
// // Keep taking zero-states, until there are no more left to take
|
||||
|
Reference in New Issue
Block a user