Used Pike's algorithm (an extension to Thompson's algorithm) (see Russ Cox's 2nd article); I think I almost have a working PCRE-style engine

implementPCREMatchingRules
Aadhavan Srinivasan 1 month ago
parent 052de55826
commit 3604486a9b

@ -2,7 +2,6 @@ package regex
import (
"fmt"
"slices"
"sort"
)
@ -252,32 +251,36 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
return indices
}
func addStateToList(idx int, list []nfaState, state nfaState) []nfaState {
func addStateToList(idx int, list []nfaState, state nfaState, threadGroups []Group) []nfaState {
if stateExists(list, state) {
return list
}
if state.isAlternation {
copyThread(state.next, state)
list = append(list, addStateToList(idx, list, *state.next)...)
if state.isKleene || state.isQuestion {
copyThread(state.splitState, state)
list = append(list, addStateToList(idx, list, *state.splitState)...)
list = addStateToList(idx, list, *state.splitState, threadGroups)
copyThread(state.next, state)
list = addStateToList(idx, list, *state.next, threadGroups)
return list
}
if state.isKleene {
copyThread(state.splitState, state)
list = append(list, addStateToList(idx, list, *state.splitState)...)
if state.isAlternation {
copyThread(state.next, state)
list = append(list, addStateToList(idx, list, *state.next)...)
list = addStateToList(idx, list, *state.next, threadGroups)
copyThread(state.splitState, state)
list = addStateToList(idx, list, *state.splitState, threadGroups)
return list
}
state.threadGroups = append([]Group{}, threadGroups...)
if state.groupBegin {
state.threadGroups[state.groupNum].StartIdx = idx
return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...)
}
if state.groupEnd {
state.threadGroups[state.groupNum].StartIdx = idx
state.threadGroups[state.groupNum].EndIdx = idx
return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...)
}
copyThread(state.next, state)
return append(list, *state.next)
state.threadGroups = append([]Group{}, threadGroups...)
return append(list, state)
}
@ -335,138 +338,113 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
// tempIndices[start.groupNum].startIdx = i
//}
currentStates = append(currentStates, *start)
var foundMatch bool
var isEmptyAndNoAssertion bool
start.threadGroups = newMatch(numGroups + 1)
start.threadGroups[0].StartIdx = i
currentStates = addStateToList(i, currentStates, *start, start.threadGroups)
var match Match = nil
// var isEmptyAndNoAssertion bool
// Main loop
for idx := i; idx <= len(str); idx++ {
if len(currentStates) == 0 {
break
}
for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
currentState := currentStates[currentStateIdx]
foundMatch = false
isEmptyAndNoAssertion = false
if currentState.threadGroups == nil {
currentState.threadGroups = newMatch(numGroups + 1)
currentState.threadGroups[0].StartIdx = idx
}
if currentState.groupBegin {
currentState.threadGroups[currentState.groupNum].StartIdx = idx
// allMatches := make([]nfaState, 0)
// for _, v := range currentState.transitions {
// dereferenced := funcMap(v, func(s *nfaState) nfaState {
// return *s
// })
// allMatches = append(allMatches, dereferenced...)
// }
// slices.Reverse(allMatches)
// for i := range allMatches {
// copyThread(&allMatches[i], currentState)
// }
// currentStates = append(currentStates, allMatches...)
if currentState.isLast {
currentState.threadGroups[0].EndIdx = idx
match = append([]Group{}, currentState.threadGroups...)
break
} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
if currentState.contentContains(str, idx) {
nextStates = addStateToList(idx+1, nextStates, *currentState.next, currentState.threadGroups)
}
if currentState.groupEnd {
currentState.threadGroups[currentState.groupNum].EndIdx = idx
// allMatches := make([]nfaState, 0)
// for _, v := range currentState.transitions {
// dereferenced := funcMap(v, func(s *nfaState) nfaState {
// return *s
// })
// allMatches = append(allMatches, dereferenced...)
// }
// slices.Reverse(allMatches)
// for i := range allMatches {
// copyThread(&allMatches[i], currentState)
// }
// currentStates = append(currentStates, allMatches...)
}
// if currentState.isKleene {
// // Append the next-state (after the kleene), then append the kleene state
// allMatches := make([]*nfaState, 0)
// for _, v := range currentState.transitions {
// allMatches = append(allMatches, v...)
// if currentState.groupBegin {
// currentState.threadGroups[currentState.groupNum].StartIdx = idx
// }
// slices.Reverse(allMatches)
// for _, m := range allMatches {
// m.threadGroups = currentState.threadGroups
// m.threadSP = idx
// }
// currentStates = append(currentStates, allMatches...)
//
// // kleeneState := currentState.kleeneState
// // kleeneState.threadGroups = currentState.threadGroups
// // kleeneState.threadSP = currentState.threadSP
// // currentStates = append(currentStates, kleeneState)
// continue
// if currentState.groupEnd {
// currentState.threadGroups[currentState.groupNum].EndIdx = idx
// }
// Alternation - enqueue left then right state, and continue
if currentState.isAlternation {
if currentState.isKleene { // Reverse order of adding things
rightState := currentState.splitState
copyThread(rightState, currentState)
currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
leftState := currentState.next
copyThread(leftState, currentState)
currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
} else {
leftState := currentState.next
copyThread(leftState, currentState)
currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
rightState := currentState.splitState
copyThread(rightState, currentState)
currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
}
continue
}
// if currentState.isAlternation {
// if currentState.isKleene { // Reverse order of adding things
// rightState := currentState.splitState
// copyThread(rightState, currentState)
// currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
// leftState := currentState.next
// copyThread(leftState, currentState)
// currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
// } else {
// leftState := currentState.next
// copyThread(leftState, currentState)
// currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
// rightState := currentState.splitState
// copyThread(rightState, currentState)
// currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
// }
// continue
// }
// Empty state - enqueue next state, do _not_ increment the SP
if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
isEmptyAndNoAssertion = true
}
if currentState.contentContains(str, idx) {
foundMatch = true
}
if isEmptyAndNoAssertion || foundMatch {
nextMatch := *(currentState.next)
copyThread(&nextMatch, currentState)
if currentState.groupBegin {
// if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
// isEmptyAndNoAssertion = true
// }
//
// if currentState.contentContains(str, idx) {
// foundMatch = true
// }
//
// if isEmptyAndNoAssertion || foundMatch {
// nextMatch := *(currentState.next)
// copyThread(&nextMatch, currentState)
// if currentState.groupBegin {
// // if !stateExists(currentStates, nextMatch) {
// currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
// //}
// } else if currentState.groupEnd {
// if !stateExists(currentStates, nextMatch) {
currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
// currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
// }
// } else if currentState.assert != noneAssert {
// if !stateExists(currentStates, nextMatch) {
// currentStates = append(currentStates, nextMatch)
// }
// } else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
// if !stateExists(currentStates, nextMatch) {
// currentStates = append(currentStates, nextMatch)
// }
// } else {
// if !stateExists(nextStates, nextMatch) {
// nextStates = append(nextStates, nextMatch)
// }
// }
// }
//
// if currentState.isLast && len(nextStates) == 0 { // Last state reached
// currentState.threadGroups[0].EndIdx = idx
// if idx == currentState.threadGroups[0].StartIdx {
// idx += 1
// }
// return true, currentState.threadGroups, idx
// }
} else if currentState.groupEnd {
if !stateExists(currentStates, nextMatch) {
currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
}
} else if currentState.assert != noneAssert {
if !stateExists(currentStates, nextMatch) {
currentStates = append(currentStates, nextMatch)
}
} else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
if !stateExists(currentStates, nextMatch) {
currentStates = append(currentStates, nextMatch)
}
} else {
if !stateExists(nextStates, nextMatch) {
nextStates = append(nextStates, nextMatch)
}
}
}
if currentState.isLast && len(nextStates) == 0 { // Last state reached
currentState.threadGroups[0].EndIdx = idx
if idx == currentState.threadGroups[0].StartIdx {
idx += 1
}
return true, currentState.threadGroups, idx
}
}
currentStates = append([]nfaState{}, nextStates...)
nextStates = nil
}
if match != nil {
if offset == match[0].EndIdx {
return true, match, match[0].EndIdx + 1
}
return true, match, match[0].EndIdx
}
return false, []Group{}, i + 1
// zeroStates := make([]*nfaState, 0)
// // Keep taking zero-states, until there are no more left to take

Loading…
Cancel
Save