Started implementing Thompson's algorithm for matching, because the old one was completely backtracking (so it would enter infinite loops on something like '(a*)*' )

The git diff claims that a ton of code was changed, but most of it was just indentation changes.
implementPCREMatchingRules
Aadhavan Srinivasan 1 month ago
parent d4e8cb74fd
commit 7c62ba6bfd

@ -277,6 +277,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
//startIdx := offset //startIdx := offset
//endIdx := offset //endIdx := offset
currentStates := make([]nfaState, 0) currentStates := make([]nfaState, 0)
nextStates := make([]nfaState, 0)
// tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration // tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
i := offset // Index in string i := offset // Index in string
//startingFrom := i // Store starting index //startingFrom := i // Store starting index
@ -311,103 +312,106 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
var foundMatch bool var foundMatch bool
var isEmptyAndNoAssertion bool var isEmptyAndNoAssertion bool
// Main loop // Main loop
for len(currentStates) > 0 { for idx := i; idx <= len(str); idx++ {
currentState, _ := pop(&currentStates) for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
idx := currentState.threadSP currentState := currentStates[currentStateIdx]
foundMatch = false foundMatch = false
isEmptyAndNoAssertion = false isEmptyAndNoAssertion = false
if currentState.threadGroups == nil { if currentState.threadGroups == nil {
currentState.threadGroups = newMatch(numGroups + 1) currentState.threadGroups = newMatch(numGroups + 1)
currentState.threadGroups[0].StartIdx = idx currentState.threadGroups[0].StartIdx = idx
} }
if currentState.groupBegin {
currentState.threadGroups[currentState.groupNum].StartIdx = idx
}
if currentState.groupEnd { if currentState.groupBegin {
currentState.threadGroups[currentState.groupNum].EndIdx = idx currentState.threadGroups[currentState.groupNum].StartIdx = idx
} }
// if currentState.isKleene { if currentState.groupEnd {
// // Append the next-state (after the kleene), then append the kleene state currentState.threadGroups[currentState.groupNum].EndIdx = idx
// allMatches := make([]*nfaState, 0) }
// for _, v := range currentState.transitions {
// allMatches = append(allMatches, v...)
// }
// slices.Reverse(allMatches)
// for _, m := range allMatches {
// m.threadGroups = currentState.threadGroups
// m.threadSP = idx
// }
// currentStates = append(currentStates, allMatches...)
//
// // kleeneState := currentState.kleeneState
// // kleeneState.threadGroups = currentState.threadGroups
// // kleeneState.threadSP = currentState.threadSP
// // currentStates = append(currentStates, kleeneState)
// continue
// }
// Alternation - enqueue left then right state, and continue
if currentState.isAlternation {
rightState := currentState.rightState
copyThread(rightState, currentState)
currentStates = append(currentStates, *currentState.rightState)
leftState := currentState.leftState
copyThread(leftState, currentState)
currentStates = append(currentStates, *currentState.leftState)
continue
}
// Empty state - enqueue next state, do _not_ increment the SP // if currentState.isKleene {
if currentState.isEmpty && currentState.assert == noneAssert { // // Append the next-state (after the kleene), then append the kleene state
isEmptyAndNoAssertion = true // allMatches := make([]*nfaState, 0)
} // for _, v := range currentState.transitions {
// allMatches = append(allMatches, v...)
// }
// slices.Reverse(allMatches)
// for _, m := range allMatches {
// m.threadGroups = currentState.threadGroups
// m.threadSP = idx
// }
// currentStates = append(currentStates, allMatches...)
//
// // kleeneState := currentState.kleeneState
// // kleeneState.threadGroups = currentState.threadGroups
// // kleeneState.threadSP = currentState.threadSP
// // currentStates = append(currentStates, kleeneState)
// continue
// }
// Alternation - enqueue left then right state, and continue
if currentState.isAlternation {
leftState := currentState.leftState
copyThread(leftState, currentState)
currentStates = append(currentStates, *currentState.leftState)
rightState := currentState.rightState
copyThread(rightState, currentState)
currentStates = append(currentStates, *currentState.rightState)
continue
}
if currentState.contentContains(str, idx) { // Empty state - enqueue next state, do _not_ increment the SP
foundMatch = true if currentState.isEmpty && currentState.assert == noneAssert {
} isEmptyAndNoAssertion = true
}
if isEmptyAndNoAssertion || foundMatch { if currentState.contentContains(str, idx) {
allMatches := make([]nfaState, 0) foundMatch = true
for _, v := range currentState.transitions {
dereferenced := funcMap(v, func(s *nfaState) nfaState {
return *s
})
allMatches = append(allMatches, dereferenced...)
} }
slices.Reverse(allMatches)
for i := range allMatches { if isEmptyAndNoAssertion || foundMatch {
copyThread(&allMatches[i], currentState) allMatches := make([]nfaState, 0)
if foundMatch && currentState.assert == noneAssert { for _, v := range currentState.transitions {
allMatches[i].threadSP += 1 dereferenced := funcMap(v, func(s *nfaState) nfaState {
return *s
})
allMatches = append(allMatches, dereferenced...)
} }
slices.Reverse(allMatches)
for i := range allMatches {
copyThread(&allMatches[i], currentState)
if foundMatch && currentState.assert == noneAssert {
allMatches[i].threadSP += 1
}
}
nextStates = append(nextStates, allMatches...)
} }
currentStates = append(currentStates, allMatches...)
}
if currentState.isLast { // Last state reached if currentState.isLast { // Last state reached
if foundMatch { if foundMatch {
if currentState.assert != noneAssert { if currentState.assert != noneAssert {
currentState.threadGroups[0].EndIdx = idx
} else {
currentState.threadGroups[0].EndIdx = idx + 1
}
if idx == currentState.threadGroups[0].StartIdx {
idx += 1
}
return true, currentState.threadGroups, idx
} else if currentState.isEmpty && currentState.assert == noneAssert {
currentState.threadGroups[0].EndIdx = idx currentState.threadGroups[0].EndIdx = idx
} else { if idx == currentState.threadGroups[0].StartIdx {
currentState.threadGroups[0].EndIdx = idx + 1 idx++
}
return true, currentState.threadGroups, idx
} }
if idx == currentState.threadGroups[0].StartIdx {
idx += 1
}
return true, currentState.threadGroups, idx
} else if currentState.isEmpty && currentState.assert == noneAssert {
currentState.threadGroups[0].EndIdx = idx
if idx == currentState.threadGroups[0].StartIdx {
idx++
}
return true, currentState.threadGroups, idx
}
}
} }
copy(currentStates, nextStates)
nextStates = nil
} }
return false, []Group{}, i + 1 return false, []Group{}, i + 1
// zeroStates := make([]*nfaState, 0) // zeroStates := make([]*nfaState, 0)

Loading…
Cancel
Save