Implement PCRE Matching (prefer left-branch) #2
@@ -74,58 +74,58 @@ func getZeroGroup(m Match) Group {
|
|||||||
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
|
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
|
||||||
// the second ret val is true.
|
// the second ret val is true.
|
||||||
// If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
|
// If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
|
||||||
func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
|
//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
|
||||||
for _, state := range states {
|
// for _, state := range states {
|
||||||
if len(state.transitions[epsilon]) > 0 {
|
// if len(state.transitions[epsilon]) > 0 {
|
||||||
for _, s := range state.transitions[epsilon] {
|
// for _, s := range state.transitions[epsilon] {
|
||||||
if s.threadGroups == nil {
|
// if s.threadGroups == nil {
|
||||||
s.threadGroups = newMatch(numGroups + 1)
|
// s.threadGroups = newMatch(numGroups + 1)
|
||||||
}
|
// }
|
||||||
copy(s.threadGroups, state.threadGroups)
|
// copy(s.threadGroups, state.threadGroups)
|
||||||
if s.groupBegin {
|
// if s.groupBegin {
|
||||||
s.threadGroups[s.groupNum].StartIdx = idx
|
// s.threadGroups[s.groupNum].StartIdx = idx
|
||||||
// openParenGroups = append(openParenGroups, s.groupNum)
|
// // openParenGroups = append(openParenGroups, s.groupNum)
|
||||||
}
|
// }
|
||||||
if s.groupEnd {
|
// if s.groupEnd {
|
||||||
s.threadGroups[s.groupNum].EndIdx = idx
|
// s.threadGroups[s.groupNum].EndIdx = idx
|
||||||
// closeParenGroups = append(closeParenGroups, s.groupNum)
|
// // closeParenGroups = append(closeParenGroups, s.groupNum)
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
rtv = append(rtv, state.transitions[epsilon]...)
|
// rtv = append(rtv, state.transitions[epsilon]...)
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
for _, state := range rtv {
|
// for _, state := range rtv {
|
||||||
if len(state.transitions[epsilon]) > 0 {
|
// if len(state.transitions[epsilon]) > 0 {
|
||||||
return rtv, true
|
// return rtv, true
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
return rtv, false
|
// return rtv, false
|
||||||
}
|
//}
|
||||||
|
|
||||||
// zeroMatchPossible returns true if a zero-length match is possible
|
// zeroMatchPossible returns true if a zero-length match is possible
|
||||||
// from any of the given states, given the string and our position in it.
|
// from any of the given states, given the string and our position in it.
|
||||||
// It uses the same algorithm to find zero-states as the one inside the loop,
|
// It uses the same algorithm to find zero-states as the one inside the loop,
|
||||||
// so I should probably put it in a function.
|
// so I should probably put it in a function.
|
||||||
func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
|
//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
|
||||||
zeroStates, isZero := takeZeroState(states, numGroups, idx)
|
// zeroStates, isZero := takeZeroState(states, numGroups, idx)
|
||||||
tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
|
// tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
|
||||||
tempstates = append(tempstates, states...)
|
// tempstates = append(tempstates, states...)
|
||||||
tempstates = append(tempstates, zeroStates...)
|
// tempstates = append(tempstates, zeroStates...)
|
||||||
num_appended := 0 // number of unique states addded to tempstates
|
// num_appended := 0 // number of unique states addded to tempstates
|
||||||
for isZero == true {
|
// for isZero == true {
|
||||||
zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
|
// zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
|
||||||
tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
|
// tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
|
||||||
if num_appended == 0 { // break if we haven't appended any more unique values
|
// if num_appended == 0 { // break if we haven't appended any more unique values
|
||||||
break
|
// break
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
for _, state := range tempstates {
|
// for _, state := range tempstates {
|
||||||
if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
|
// if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
|
||||||
return true
|
// return true
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
return false
|
// return false
|
||||||
}
|
//}
|
||||||
|
|
||||||
// Prunes the slice by removing overlapping indices.
|
// Prunes the slice by removing overlapping indices.
|
||||||
func pruneIndices(indices []Match) []Match {
|
func pruneIndices(indices []Match) []Match {
|
||||||
@@ -376,17 +376,26 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
|
|
||||||
// Alternation - enqueue left then right state, and continue
|
// Alternation - enqueue left then right state, and continue
|
||||||
if currentState.isAlternation {
|
if currentState.isAlternation {
|
||||||
leftState := currentState.leftState
|
if currentState.isKleene { // Reverse order of adding things
|
||||||
copyThread(leftState, currentState)
|
rightState := currentState.splitState
|
||||||
currentStates = append(currentStates, *currentState.leftState)
|
copyThread(rightState, currentState)
|
||||||
rightState := currentState.rightState
|
currentStates = append(currentStates, *currentState.splitState)
|
||||||
copyThread(rightState, currentState)
|
leftState := currentState.next
|
||||||
currentStates = append(currentStates, *currentState.rightState)
|
copyThread(leftState, currentState)
|
||||||
|
currentStates = append(currentStates, *currentState.next)
|
||||||
|
} else {
|
||||||
|
leftState := currentState.next
|
||||||
|
copyThread(leftState, currentState)
|
||||||
|
currentStates = append(currentStates, *currentState.next)
|
||||||
|
rightState := currentState.splitState
|
||||||
|
copyThread(rightState, currentState)
|
||||||
|
currentStates = append(currentStates, *currentState.splitState)
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Empty state - enqueue next state, do _not_ increment the SP
|
// Empty state - enqueue next state, do _not_ increment the SP
|
||||||
if currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
|
if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
|
||||||
isEmptyAndNoAssertion = true
|
isEmptyAndNoAssertion = true
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -396,12 +405,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
|
|
||||||
if isEmptyAndNoAssertion || foundMatch {
|
if isEmptyAndNoAssertion || foundMatch {
|
||||||
allMatches := make([]nfaState, 0)
|
allMatches := make([]nfaState, 0)
|
||||||
for _, v := range currentState.transitions {
|
allMatches = append(allMatches, *(currentState.next))
|
||||||
dereferenced := funcMap(v, func(s *nfaState) nfaState {
|
|
||||||
return *s
|
|
||||||
})
|
|
||||||
allMatches = append(allMatches, dereferenced...)
|
|
||||||
}
|
|
||||||
slices.Reverse(allMatches)
|
slices.Reverse(allMatches)
|
||||||
for i := range allMatches {
|
for i := range allMatches {
|
||||||
copyThread(&allMatches[i], currentState)
|
copyThread(&allMatches[i], currentState)
|
||||||
@@ -419,24 +423,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
}
|
}
|
||||||
|
|
||||||
if currentState.isLast && len(nextStates) == 0 { // Last state reached
|
if currentState.isLast && len(nextStates) == 0 { // Last state reached
|
||||||
if foundMatch {
|
currentState.threadGroups[0].EndIdx = idx
|
||||||
if currentState.assert != noneAssert {
|
if idx == currentState.threadGroups[0].StartIdx {
|
||||||
currentState.threadGroups[0].EndIdx = idx
|
idx += 1
|
||||||
} else {
|
|
||||||
currentState.threadGroups[0].EndIdx = idx + 1
|
|
||||||
}
|
|
||||||
if idx == currentState.threadGroups[0].StartIdx {
|
|
||||||
idx += 1
|
|
||||||
}
|
|
||||||
return true, currentState.threadGroups, idx
|
|
||||||
} else if isEmptyAndNoAssertion {
|
|
||||||
currentState.threadGroups[0].EndIdx = idx
|
|
||||||
if idx == currentState.threadGroups[0].StartIdx {
|
|
||||||
idx++
|
|
||||||
}
|
|
||||||
return true, currentState.threadGroups, idx
|
|
||||||
}
|
}
|
||||||
|
return true, currentState.threadGroups, idx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
currentStates = append([]nfaState{}, nextStates...)
|
currentStates = append([]nfaState{}, nextStates...)
|
||||||
|
Reference in New Issue
Block a user