5 Commits

2 changed files with 139 additions and 48 deletions

View File

@@ -151,6 +151,11 @@ func pruneIndices(indices []Match) []Match {
return toRet return toRet
} }
func copyThread(to *nfaState, from nfaState) {
to.threadSP = from.threadSP
to.threadGroups = append([]Group{}, from.threadGroups...)
}
// Find returns the 0-group of the leftmost match of the regex in the given string. // Find returns the 0-group of the leftmost match of the regex in the given string.
// An error value != nil indicates that no match was found. // An error value != nil indicates that no match was found.
func (regex Reg) Find(str string) (Group, error) { func (regex Reg) Find(str string) (Group, error) {
@@ -271,7 +276,8 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
// foundPath := false // foundPath := false
//startIdx := offset //startIdx := offset
//endIdx := offset //endIdx := offset
currentStates := make([]*nfaState, 0) currentStates := make([]nfaState, 0)
nextStates := make([]nfaState, 0)
// tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration // tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
i := offset // Index in string i := offset // Index in string
//startingFrom := i // Store starting index //startingFrom := i // Store starting index
@@ -302,57 +308,139 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
//} //}
start.threadSP = i start.threadSP = i
currentStates = append(currentStates, start) currentStates = append(currentStates, *start)
var foundMatch bool var foundMatch bool
var isEmptyAndNoAssertion bool
// Main loop // Main loop
for len(currentStates) > 0 { for idx := i; idx <= len(str); idx++ {
currentState, _ := pop(&currentStates) for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
idx := currentState.threadSP currentState := currentStates[currentStateIdx]
foundMatch = false foundMatch = false
isEmptyAndNoAssertion = false
if currentState.threadGroups == nil { if currentState.threadGroups == nil {
currentState.threadGroups = newMatch(numGroups + 1) currentState.threadGroups = newMatch(numGroups + 1)
currentState.threadGroups[0].StartIdx = idx currentState.threadGroups[0].StartIdx = idx
}
if currentState.groupBegin {
currentState.threadGroups[currentState.groupNum].StartIdx = idx
} else if currentState.groupEnd {
currentState.threadGroups[currentState.groupNum].EndIdx = idx
} else if currentState.isKleene {
// Append the
} else if currentState.isAlternation {
rightState := currentState.rightState
rightState.threadGroups = currentState.threadGroups
rightState.threadSP = currentState.threadSP
currentStates = append(currentStates, currentState.rightState)
leftState := currentState.leftState
leftState.threadGroups = currentState.threadGroups
leftState.threadSP = currentState.threadSP
currentStates = append(currentStates, currentState.leftState)
continue
} else if currentState.contentContains(str, idx) {
foundMatch = true
allMatches := make([]*nfaState, 0)
for _, v := range currentState.transitions {
allMatches = append(allMatches, v...)
} }
slices.Reverse(allMatches)
for _, m := range allMatches { if currentState.groupBegin {
m.threadGroups = currentState.threadGroups currentState.threadGroups[currentState.groupNum].StartIdx = idx
if currentState.assert == noneAssert { // allMatches := make([]nfaState, 0)
m.threadSP = idx + 1 // for _, v := range currentState.transitions {
// dereferenced := funcMap(v, func(s *nfaState) nfaState {
// return *s
// })
// allMatches = append(allMatches, dereferenced...)
// }
// slices.Reverse(allMatches)
// for i := range allMatches {
// copyThread(&allMatches[i], currentState)
// }
// currentStates = append(currentStates, allMatches...)
}
if currentState.groupEnd {
currentState.threadGroups[currentState.groupNum].EndIdx = idx
// allMatches := make([]nfaState, 0)
// for _, v := range currentState.transitions {
// dereferenced := funcMap(v, func(s *nfaState) nfaState {
// return *s
// })
// allMatches = append(allMatches, dereferenced...)
// }
// slices.Reverse(allMatches)
// for i := range allMatches {
// copyThread(&allMatches[i], currentState)
// }
// currentStates = append(currentStates, allMatches...)
}
// if currentState.isKleene {
// // Append the next-state (after the kleene), then append the kleene state
// allMatches := make([]*nfaState, 0)
// for _, v := range currentState.transitions {
// allMatches = append(allMatches, v...)
// }
// slices.Reverse(allMatches)
// for _, m := range allMatches {
// m.threadGroups = currentState.threadGroups
// m.threadSP = idx
// }
// currentStates = append(currentStates, allMatches...)
//
// // kleeneState := currentState.kleeneState
// // kleeneState.threadGroups = currentState.threadGroups
// // kleeneState.threadSP = currentState.threadSP
// // currentStates = append(currentStates, kleeneState)
// continue
// }
// Alternation - enqueue left then right state, and continue
if currentState.isAlternation {
leftState := currentState.leftState
copyThread(leftState, currentState)
currentStates = append(currentStates, *currentState.leftState)
rightState := currentState.rightState
copyThread(rightState, currentState)
currentStates = append(currentStates, *currentState.rightState)
continue
}
// Empty state - enqueue next state, do _not_ increment the SP
if currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
isEmptyAndNoAssertion = true
}
if currentState.contentContains(str, idx) {
foundMatch = true
}
if isEmptyAndNoAssertion || foundMatch {
allMatches := make([]nfaState, 0)
for _, v := range currentState.transitions {
dereferenced := funcMap(v, func(s *nfaState) nfaState {
return *s
})
allMatches = append(allMatches, dereferenced...)
}
slices.Reverse(allMatches)
for i := range allMatches {
copyThread(&allMatches[i], currentState)
if foundMatch && currentState.assert == noneAssert {
allMatches[i].threadSP += 1
}
}
if currentState.groupBegin {
currentStates = slices.Insert(currentStates, currentStateIdx+1, allMatches...)
} else if currentState.groupEnd {
currentStates = append(currentStates, allMatches...)
} else { } else {
m.threadSP = idx nextStates = append(nextStates, allMatches...)
} }
} }
currentStates = append(currentStates, allMatches...)
}
if currentState.isLast && foundMatch { // Last state reached if currentState.isLast && len(nextStates) == 0 { // Last state reached
currentState.threadGroups[0].EndIdx = idx + 1 if foundMatch {
return true, currentState.threadGroups, idx + 1 if currentState.assert != noneAssert {
currentState.threadGroups[0].EndIdx = idx
} else {
currentState.threadGroups[0].EndIdx = idx + 1
}
if idx == currentState.threadGroups[0].StartIdx {
idx += 1
}
return true, currentState.threadGroups, idx
} else if isEmptyAndNoAssertion {
currentState.threadGroups[0].EndIdx = idx
if idx == currentState.threadGroups[0].StartIdx {
idx++
}
return true, currentState.threadGroups, idx
}
}
} }
currentStates = append([]nfaState{}, nextStates...)
nextStates = nil
} }
return false, []Group{}, i + 1 return false, []Group{}, i + 1
// zeroStates := make([]*nfaState, 0) // zeroStates := make([]*nfaState, 0)

View File

@@ -116,7 +116,6 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
clone.rightState = clone clone.rightState = clone
} }
clone.rightState = cloneStateHelper(stateToClone.rightState, cloneMap) clone.rightState = cloneStateHelper(stateToClone.rightState, cloneMap)
return clone return clone
} }
@@ -326,12 +325,16 @@ func kleene(s1 nfaState) (*nfaState, error) {
return nil, fmt.Errorf("previous token is not quantifiable") return nil, fmt.Errorf("previous token is not quantifiable")
} }
toReturn := &nfaState{} emptyState := zeroLengthMatchState()
toReturn.transitions = make(map[int][]*nfaState) emptyState.assert = noneAssert
toReturn.content = newContents(epsilon) toReturn := alternate(&s1, &emptyState)
// toReturn := &nfaState{}
// toReturn.transitions = make(map[int][]*nfaState)
// toReturn.content = newContents(epsilon)
toReturn.isEmpty = true toReturn.isEmpty = true
toReturn.isKleene = true toReturn.isKleene = true
toReturn.output = append(toReturn.output, toReturn) toReturn.output = []*nfaState{&emptyState}
for i := range s1.output { for i := range s1.output {
for _, c := range toReturn.content { for _, c := range toReturn.content {
s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], toReturn) s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], toReturn)
@@ -340,6 +343,7 @@ func kleene(s1 nfaState) (*nfaState, error) {
for _, c := range s1.content { for _, c := range s1.content {
toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1) toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
} }
//toReturn.kleeneState = &s1
return toReturn, nil return toReturn, nil
} }
@@ -374,7 +378,6 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
s2.content = newContents(epsilon) s2.content = newContents(epsilon)
s2.output = append(s2.output, s2) s2.output = append(s2.output, s2)
s2.isEmpty = true s2.isEmpty = true
s2.isAlternation = true
s3 := alternate(s1, s2) s3 := alternate(s1, s2)
return s3 return s3
} }