Compare commits
7 Commits
ed4ffde64e
...
22ead83625
Author | SHA1 | Date | |
---|---|---|---|
22ead83625 | |||
3604486a9b | |||
052de55826 | |||
d2ad0d95a8 | |||
ccf3b3b299 | |||
1d4f695f8f | |||
8534174ea1 |
@@ -987,7 +987,8 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated
|
if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated
|
||||||
// Map the list of nodes to a list of states, each state containing the contents of a specific node
|
// Map the list of nodes to a list of states, each state containing the contents of a specific node
|
||||||
states := funcMap(c.nodeContents, func(node postfixNode) *nfaState {
|
states := funcMap(c.nodeContents, func(node postfixNode) *nfaState {
|
||||||
s := newState()
|
s := &nfaState{}
|
||||||
|
s.output = append(s.output, s)
|
||||||
nodeContents := node.contents
|
nodeContents := node.contents
|
||||||
if caseInsensitive {
|
if caseInsensitive {
|
||||||
nodeContents = slices.Concat(funcMap(nodeContents, func(r rune) []rune {
|
nodeContents = slices.Concat(funcMap(nodeContents, func(r rune) []rune {
|
||||||
@@ -1001,7 +1002,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
return n.contents
|
return n.contents
|
||||||
})...)
|
})...)
|
||||||
}
|
}
|
||||||
return &s
|
return s
|
||||||
})
|
})
|
||||||
// Reduce the list of states down to a single state by alternating them
|
// Reduce the list of states down to a single state by alternating them
|
||||||
toAdd := funcReduce(states, func(s1 *nfaState, s2 *nfaState) *nfaState {
|
toAdd := funcReduce(states, func(s1 *nfaState, s2 *nfaState) *nfaState {
|
||||||
@@ -1046,7 +1047,10 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return Reg{}, fmt.Errorf("error applying question operator")
|
return Reg{}, fmt.Errorf("error applying question operator")
|
||||||
}
|
}
|
||||||
s2 := question(s1)
|
s2, err := question(s1)
|
||||||
|
if err != nil {
|
||||||
|
return Reg{}, err
|
||||||
|
}
|
||||||
nfa = append(nfa, s2)
|
nfa = append(nfa, s2)
|
||||||
case pipeNode:
|
case pipeNode:
|
||||||
// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
|
// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
|
||||||
@@ -1105,7 +1109,11 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
stateToAdd = concatenate(stateToAdd, s2)
|
stateToAdd = concatenate(stateToAdd, s2)
|
||||||
} else { // Case 2
|
} else { // Case 2
|
||||||
for i := c.startReps; i < c.endReps; i++ {
|
for i := c.startReps; i < c.endReps; i++ {
|
||||||
stateToAdd = concatenate(stateToAdd, question(cloneState(poppedState)))
|
tmp, err := question(cloneState(poppedState))
|
||||||
|
if err != nil {
|
||||||
|
return Reg{}, fmt.Errorf("error processing bounded repetition")
|
||||||
|
}
|
||||||
|
stateToAdd = concatenate(stateToAdd, tmp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nfa = append(nfa, stateToAdd)
|
nfa = append(nfa, stateToAdd)
|
||||||
|
@@ -2,7 +2,6 @@ package regex
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"slices"
|
|
||||||
"sort"
|
"sort"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -152,7 +151,6 @@ func pruneIndices(indices []Match) []Match {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func copyThread(to *nfaState, from nfaState) {
|
func copyThread(to *nfaState, from nfaState) {
|
||||||
to.threadSP = from.threadSP
|
|
||||||
to.threadGroups = append([]Group{}, from.threadGroups...)
|
to.threadGroups = append([]Group{}, from.threadGroups...)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -253,6 +251,43 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
|
|||||||
return indices
|
return indices
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group) []nfaState {
|
||||||
|
if stateExists(list, state) {
|
||||||
|
return list
|
||||||
|
}
|
||||||
|
if state.isKleene || state.isQuestion {
|
||||||
|
copyThread(state.splitState, state)
|
||||||
|
list = addStateToList(str, idx, list, *state.splitState, threadGroups)
|
||||||
|
copyThread(state.next, state)
|
||||||
|
list = addStateToList(str, idx, list, *state.next, threadGroups)
|
||||||
|
return list
|
||||||
|
}
|
||||||
|
if state.isAlternation {
|
||||||
|
copyThread(state.next, state)
|
||||||
|
list = addStateToList(str, idx, list, *state.next, threadGroups)
|
||||||
|
copyThread(state.splitState, state)
|
||||||
|
list = addStateToList(str, idx, list, *state.splitState, threadGroups)
|
||||||
|
return list
|
||||||
|
}
|
||||||
|
state.threadGroups = append([]Group{}, threadGroups...)
|
||||||
|
if state.assert != noneAssert {
|
||||||
|
if state.checkAssertion(str, idx) {
|
||||||
|
copyThread(state.next, state)
|
||||||
|
return append(list, addStateToList(str, idx, list, *state.next, state.threadGroups)...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if state.groupBegin {
|
||||||
|
state.threadGroups[state.groupNum].StartIdx = idx
|
||||||
|
return append(list, addStateToList(str, idx, list, *state.next, state.threadGroups)...)
|
||||||
|
}
|
||||||
|
if state.groupEnd {
|
||||||
|
state.threadGroups[state.groupNum].EndIdx = idx
|
||||||
|
return append(list, addStateToList(str, idx, list, *state.next, state.threadGroups)...)
|
||||||
|
}
|
||||||
|
return append(list, state)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Helper for FindAllMatches. Returns whether it found a match, the
|
// Helper for FindAllMatches. Returns whether it found a match, the
|
||||||
// first Match it finds, and how far it got into the string ie. where
|
// first Match it finds, and how far it got into the string ie. where
|
||||||
// the next search should start from.
|
// the next search should start from.
|
||||||
@@ -307,134 +342,113 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
// tempIndices[start.groupNum].startIdx = i
|
// tempIndices[start.groupNum].startIdx = i
|
||||||
//}
|
//}
|
||||||
|
|
||||||
start.threadSP = i
|
start.threadGroups = newMatch(numGroups + 1)
|
||||||
currentStates = append(currentStates, *start)
|
start.threadGroups[0].StartIdx = i
|
||||||
var foundMatch bool
|
currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups)
|
||||||
var isEmptyAndNoAssertion bool
|
var match Match = nil
|
||||||
|
// var isEmptyAndNoAssertion bool
|
||||||
// Main loop
|
// Main loop
|
||||||
for idx := i; idx <= len(str); idx++ {
|
for idx := i; idx <= len(str); idx++ {
|
||||||
|
if len(currentStates) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
|
for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
|
||||||
currentState := currentStates[currentStateIdx]
|
currentState := currentStates[currentStateIdx]
|
||||||
foundMatch = false
|
|
||||||
isEmptyAndNoAssertion = false
|
|
||||||
|
|
||||||
if currentState.threadGroups == nil {
|
if currentState.threadGroups == nil {
|
||||||
currentState.threadGroups = newMatch(numGroups + 1)
|
currentState.threadGroups = newMatch(numGroups + 1)
|
||||||
currentState.threadGroups[0].StartIdx = idx
|
currentState.threadGroups[0].StartIdx = idx
|
||||||
}
|
}
|
||||||
|
|
||||||
if currentState.groupBegin {
|
if currentState.isLast {
|
||||||
currentState.threadGroups[currentState.groupNum].StartIdx = idx
|
currentState.threadGroups[0].EndIdx = idx
|
||||||
// allMatches := make([]nfaState, 0)
|
match = append([]Group{}, currentState.threadGroups...)
|
||||||
// for _, v := range currentState.transitions {
|
break
|
||||||
// dereferenced := funcMap(v, func(s *nfaState) nfaState {
|
} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
|
||||||
// return *s
|
if currentState.contentContains(str, idx) {
|
||||||
// })
|
nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups)
|
||||||
// allMatches = append(allMatches, dereferenced...)
|
}
|
||||||
// }
|
|
||||||
// slices.Reverse(allMatches)
|
|
||||||
// for i := range allMatches {
|
|
||||||
// copyThread(&allMatches[i], currentState)
|
|
||||||
// }
|
|
||||||
// currentStates = append(currentStates, allMatches...)
|
|
||||||
}
|
|
||||||
if currentState.groupEnd {
|
|
||||||
currentState.threadGroups[currentState.groupNum].EndIdx = idx
|
|
||||||
// allMatches := make([]nfaState, 0)
|
|
||||||
// for _, v := range currentState.transitions {
|
|
||||||
// dereferenced := funcMap(v, func(s *nfaState) nfaState {
|
|
||||||
// return *s
|
|
||||||
// })
|
|
||||||
// allMatches = append(allMatches, dereferenced...)
|
|
||||||
// }
|
|
||||||
// slices.Reverse(allMatches)
|
|
||||||
// for i := range allMatches {
|
|
||||||
// copyThread(&allMatches[i], currentState)
|
|
||||||
// }
|
|
||||||
// currentStates = append(currentStates, allMatches...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if currentState.isKleene {
|
// if currentState.groupBegin {
|
||||||
// // Append the next-state (after the kleene), then append the kleene state
|
// currentState.threadGroups[currentState.groupNum].StartIdx = idx
|
||||||
// allMatches := make([]*nfaState, 0)
|
|
||||||
// for _, v := range currentState.transitions {
|
|
||||||
// allMatches = append(allMatches, v...)
|
|
||||||
// }
|
// }
|
||||||
// slices.Reverse(allMatches)
|
// if currentState.groupEnd {
|
||||||
// for _, m := range allMatches {
|
// currentState.threadGroups[currentState.groupNum].EndIdx = idx
|
||||||
// m.threadGroups = currentState.threadGroups
|
|
||||||
// m.threadSP = idx
|
|
||||||
// }
|
// }
|
||||||
// currentStates = append(currentStates, allMatches...)
|
|
||||||
//
|
|
||||||
// // kleeneState := currentState.kleeneState
|
|
||||||
// // kleeneState.threadGroups = currentState.threadGroups
|
|
||||||
// // kleeneState.threadSP = currentState.threadSP
|
|
||||||
// // currentStates = append(currentStates, kleeneState)
|
|
||||||
// continue
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Alternation - enqueue left then right state, and continue
|
// Alternation - enqueue left then right state, and continue
|
||||||
if currentState.isAlternation {
|
// if currentState.isAlternation {
|
||||||
if currentState.isKleene { // Reverse order of adding things
|
// if currentState.isKleene { // Reverse order of adding things
|
||||||
rightState := currentState.splitState
|
// rightState := currentState.splitState
|
||||||
copyThread(rightState, currentState)
|
// copyThread(rightState, currentState)
|
||||||
currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
|
// currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
|
||||||
leftState := currentState.next
|
// leftState := currentState.next
|
||||||
copyThread(leftState, currentState)
|
// copyThread(leftState, currentState)
|
||||||
currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
|
// currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
|
||||||
} else {
|
// } else {
|
||||||
leftState := currentState.next
|
// leftState := currentState.next
|
||||||
copyThread(leftState, currentState)
|
// copyThread(leftState, currentState)
|
||||||
currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
|
// currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
|
||||||
rightState := currentState.splitState
|
// rightState := currentState.splitState
|
||||||
copyThread(rightState, currentState)
|
// copyThread(rightState, currentState)
|
||||||
currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
|
// currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
|
||||||
}
|
// }
|
||||||
continue
|
// continue
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Empty state - enqueue next state, do _not_ increment the SP
|
// Empty state - enqueue next state, do _not_ increment the SP
|
||||||
if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
|
// if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
|
||||||
isEmptyAndNoAssertion = true
|
// isEmptyAndNoAssertion = true
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
if currentState.contentContains(str, idx) {
|
// if currentState.contentContains(str, idx) {
|
||||||
foundMatch = true
|
// foundMatch = true
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
if isEmptyAndNoAssertion || foundMatch {
|
// if isEmptyAndNoAssertion || foundMatch {
|
||||||
allMatches := make([]nfaState, 0)
|
// nextMatch := *(currentState.next)
|
||||||
allMatches = append(allMatches, *(currentState.next))
|
// copyThread(&nextMatch, currentState)
|
||||||
slices.Reverse(allMatches)
|
// if currentState.groupBegin {
|
||||||
for i := range allMatches {
|
// // if !stateExists(currentStates, nextMatch) {
|
||||||
copyThread(&allMatches[i], currentState)
|
// currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch)
|
||||||
if foundMatch && currentState.assert == noneAssert {
|
// //}
|
||||||
allMatches[i].threadSP += 1
|
// } else if currentState.groupEnd {
|
||||||
}
|
// if !stateExists(currentStates, nextMatch) {
|
||||||
}
|
// currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch)
|
||||||
if currentState.groupBegin {
|
// }
|
||||||
currentStates = slices.Insert(currentStates, currentStateIdx+1, allMatches...)
|
// } else if currentState.assert != noneAssert {
|
||||||
} else if currentState.groupEnd {
|
// if !stateExists(currentStates, nextMatch) {
|
||||||
currentStates = append(currentStates, allMatches...)
|
// currentStates = append(currentStates, nextMatch)
|
||||||
} else if currentState.assert != noneAssert {
|
// }
|
||||||
currentStates = append(currentStates, allMatches...)
|
// } else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd {
|
||||||
} else {
|
// if !stateExists(currentStates, nextMatch) {
|
||||||
nextStates = append(nextStates, allMatches...)
|
// currentStates = append(currentStates, nextMatch)
|
||||||
}
|
// }
|
||||||
}
|
// } else {
|
||||||
|
// if !stateExists(nextStates, nextMatch) {
|
||||||
if currentState.isLast && len(nextStates) == 0 { // Last state reached
|
// nextStates = append(nextStates, nextMatch)
|
||||||
currentState.threadGroups[0].EndIdx = idx
|
// }
|
||||||
if idx == currentState.threadGroups[0].StartIdx {
|
// }
|
||||||
idx += 1
|
// }
|
||||||
}
|
//
|
||||||
return true, currentState.threadGroups, idx
|
// if currentState.isLast && len(nextStates) == 0 { // Last state reached
|
||||||
}
|
// currentState.threadGroups[0].EndIdx = idx
|
||||||
|
// if idx == currentState.threadGroups[0].StartIdx {
|
||||||
|
// idx += 1
|
||||||
|
// }
|
||||||
|
// return true, currentState.threadGroups, idx
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
currentStates = append([]nfaState{}, nextStates...)
|
currentStates = append([]nfaState{}, nextStates...)
|
||||||
nextStates = nil
|
nextStates = nil
|
||||||
}
|
}
|
||||||
|
if match != nil {
|
||||||
|
if offset == match[0].EndIdx {
|
||||||
|
return true, match, match[0].EndIdx + 1
|
||||||
|
}
|
||||||
|
return true, match, match[0].EndIdx
|
||||||
|
}
|
||||||
return false, []Group{}, i + 1
|
return false, []Group{}, i + 1
|
||||||
// zeroStates := make([]*nfaState, 0)
|
// zeroStates := make([]*nfaState, 0)
|
||||||
// // Keep taking zero-states, until there are no more left to take
|
// // Keep taking zero-states, until there are no more left to take
|
||||||
|
54
regex/nfa.go
54
regex/nfa.go
@@ -47,7 +47,6 @@ type nfaState struct {
|
|||||||
// The following properties depend on the current match - I should think about resetting them for every match.
|
// The following properties depend on the current match - I should think about resetting them for every match.
|
||||||
zeroMatchFound bool // Whether or not the state has been used for a zero-length match - only relevant for zero states
|
zeroMatchFound bool // Whether or not the state has been used for a zero-length match - only relevant for zero states
|
||||||
threadGroups []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
|
threadGroups []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
|
||||||
threadSP int // The string pointer of the thread - where it is in the input string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clones the NFA starting from the given state.
|
// Clones the NFA starting from the given state.
|
||||||
@@ -123,7 +122,6 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
|
|||||||
}
|
}
|
||||||
// Assuming it hasn't been visited
|
// Assuming it hasn't been visited
|
||||||
state.threadGroups = nil
|
state.threadGroups = nil
|
||||||
state.threadSP = 0
|
|
||||||
visitedMap[state] = true
|
visitedMap[state] = true
|
||||||
if state.isAlternation {
|
if state.isAlternation {
|
||||||
resetThreadsHelper(state.next, visitedMap)
|
resetThreadsHelper(state.next, visitedMap)
|
||||||
@@ -331,9 +329,6 @@ func kleene(s1 *nfaState) (*nfaState, error) {
|
|||||||
toReturn.isAlternation = true
|
toReturn.isAlternation = true
|
||||||
toReturn.content = newContents(epsilon)
|
toReturn.content = newContents(epsilon)
|
||||||
toReturn.splitState = s1
|
toReturn.splitState = s1
|
||||||
for i := range s1.output {
|
|
||||||
s1.output[i].next = toReturn
|
|
||||||
}
|
|
||||||
|
|
||||||
// toReturn := &nfaState{}
|
// toReturn := &nfaState{}
|
||||||
// toReturn.transitions = make(map[int][]*nfaState)
|
// toReturn.transitions = make(map[int][]*nfaState)
|
||||||
@@ -375,14 +370,20 @@ func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
|
|||||||
return toReturn
|
return toReturn
|
||||||
}
|
}
|
||||||
|
|
||||||
func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
|
func question(s1 *nfaState) (*nfaState, error) { // Use the fact that ab? == a(b|)
|
||||||
s2 := &nfaState{}
|
if s1.isEmpty && s1.assert != noneAssert {
|
||||||
|
return nil, fmt.Errorf("previous token is not quantifiable")
|
||||||
|
}
|
||||||
|
toReturn := &nfaState{}
|
||||||
|
toReturn.isEmpty = true
|
||||||
|
toReturn.isAlternation = true
|
||||||
|
toReturn.isQuestion = true
|
||||||
|
toReturn.content = newContents(epsilon)
|
||||||
|
toReturn.splitState = s1
|
||||||
|
toReturn.output = append([]*nfaState{}, toReturn)
|
||||||
|
toReturn.output = append(toReturn.output, s1.output...)
|
||||||
// s2.transitions = make(map[int][]*nfaState)
|
// s2.transitions = make(map[int][]*nfaState)
|
||||||
s2.content = newContents(epsilon)
|
return toReturn, nil
|
||||||
s2.output = append(s2.output, s2)
|
|
||||||
s2.isEmpty = true
|
|
||||||
s3 := alternate(s1, s2)
|
|
||||||
return s3
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates and returns a new state with the 'default' values.
|
// Creates and returns a new state with the 'default' values.
|
||||||
@@ -408,3 +409,32 @@ func zeroLengthMatchState() nfaState {
|
|||||||
start.assert = alwaysTrueAssert
|
start.assert = alwaysTrueAssert
|
||||||
return start
|
return start
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s nfaState) equals(other nfaState) bool {
|
||||||
|
return slices.Equal(s.content, other.content) &&
|
||||||
|
s.isEmpty == other.isEmpty &&
|
||||||
|
s.isLast == other.isLast &&
|
||||||
|
slices.Equal(s.output, other.output) &&
|
||||||
|
s.next == other.next &&
|
||||||
|
s.isKleene == other.isKleene &&
|
||||||
|
s.isQuestion == other.isQuestion &&
|
||||||
|
s.isAlternation == other.isAlternation &&
|
||||||
|
s.splitState == other.splitState &&
|
||||||
|
s.assert == other.assert &&
|
||||||
|
s.allChars == other.allChars &&
|
||||||
|
slices.Equal(s.except, other.except) &&
|
||||||
|
s.lookaroundNFA == other.lookaroundNFA &&
|
||||||
|
s.groupBegin == other.groupBegin &&
|
||||||
|
s.groupEnd == other.groupEnd &&
|
||||||
|
s.groupNum == other.groupNum &&
|
||||||
|
slices.Equal(s.threadGroups, other.threadGroups)
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateExists(list []nfaState, s nfaState) bool {
|
||||||
|
for i := range list {
|
||||||
|
if list[i].equals(s) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user