2 Commits

Author SHA1 Message Date
fbc9bea9fb Commented out unused functions; use new nfaState parameters 2025-02-05 22:23:31 -05:00
cca8c7cda2 Got rid of transitions parameter, changed how kleene state is processed
I replaced the transition parameter for nfaState, replacing it with a
single nfaState pointer. This is because any non-alternation state will
only have one next state, so the map was just added complexity.

I changed alternation processing - instead of having their own dedicated
fields, they just use the new 'next' parameter, and another one called
'splitState'.

I also changed the kleene state processing to remove the unecessary
empty state in the right-side alternation (it actually messed up my
matching).
2025-02-05 22:20:28 -05:00
3 changed files with 215 additions and 220 deletions

View File

@@ -822,7 +822,6 @@ func thompson(re []postfixNode) (Reg, error) {
for _, c := range re {
if c.nodetype == characterNode || c.nodetype == assertionNode {
stateToAdd := nfaState{}
stateToAdd.transitions = make(map[int][]*nfaState)
if c.allChars {
stateToAdd.allChars = true
if len(c.except) != 0 {
@@ -934,7 +933,6 @@ func thompson(re []postfixNode) (Reg, error) {
s.isEmpty = true
s.output = make([]*nfaState, 0)
s.output = append(s.output, s)
s.transitions = make(map[int][]*nfaState)
// LPAREN nodes are just added normally
if c.nodetype == lparenNode {
numGroups++
@@ -966,7 +964,7 @@ func thompson(re []postfixNode) (Reg, error) {
s.groupNum = lparenNode.groupNum
to_add := concatenate(lparenNode, s)
nfa = append(nfa, to_add)
} else if middleNode.groupBegin && len(middleNode.transitions) == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
} else if middleNode.groupBegin && middleNode.numTransitions() == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
nfa = append(nfa, lparenNode) // I shouldn't have popped this out, because it is not involved in the current capturing group
s.groupNum = middleNode.groupNum // In this case, the 'middle' node is actually an lparen
to_add := concatenate(middleNode, s)
@@ -1030,14 +1028,14 @@ func thompson(re []postfixNode) (Reg, error) {
if err != nil {
return Reg{}, fmt.Errorf("error applying kleene star")
}
stateToAdd, err := kleene(*s1)
stateToAdd, err := kleene(s1)
if err != nil {
return Reg{}, err
}
nfa = append(nfa, stateToAdd)
case plusNode: // a+ is equivalent to aa*
s1 := mustPop(&nfa)
s2, err := kleene(*s1)
s2, err := kleene(s1)
if err != nil {
return Reg{}, err
}
@@ -1061,14 +1059,14 @@ func thompson(re []postfixNode) (Reg, error) {
// '^|a'
s2, err1 := pop(&nfa)
s1, err2 := pop(&nfa)
if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN
if err2 != nil || (s2.groupBegin && s2.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back
nfa = append(nfa, s2)
}
tmp := zeroLengthMatchState()
s2 = &tmp
}
if err1 != nil || (s1.groupBegin && len(s1.transitions) == 0) { // Doesn't exist, or its just an LPAREN
if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
if err1 == nil { // See above for explanation
nfa = append(nfa, s1)
}
@@ -1100,7 +1098,7 @@ func thompson(re []postfixNode) (Reg, error) {
stateToAdd = concatenate(stateToAdd, cloneState(poppedState))
}
if c.endReps == infinite_reps { // Case 3
s2, err := kleene(*poppedState)
s2, err := kleene(poppedState)
if err != nil {
return Reg{}, err
}
@@ -1117,7 +1115,10 @@ func thompson(re []postfixNode) (Reg, error) {
return Reg{}, fmt.Errorf("invalid regex")
}
verifyLastStates(nfa)
lastState := newState()
lastState.isLast = true
concatenate(nfa[0], &lastState)
return Reg{nfa[0], numGroups}, nil

View File

@@ -74,58 +74,58 @@ func getZeroGroup(m Match) Group {
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// the second ret val is true.
// If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
for _, state := range states {
if len(state.transitions[epsilon]) > 0 {
for _, s := range state.transitions[epsilon] {
if s.threadGroups == nil {
s.threadGroups = newMatch(numGroups + 1)
}
copy(s.threadGroups, state.threadGroups)
if s.groupBegin {
s.threadGroups[s.groupNum].StartIdx = idx
// openParenGroups = append(openParenGroups, s.groupNum)
}
if s.groupEnd {
s.threadGroups[s.groupNum].EndIdx = idx
// closeParenGroups = append(closeParenGroups, s.groupNum)
}
}
rtv = append(rtv, state.transitions[epsilon]...)
}
}
for _, state := range rtv {
if len(state.transitions[epsilon]) > 0 {
return rtv, true
}
}
return rtv, false
}
//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
// for _, state := range states {
// if len(state.transitions[epsilon]) > 0 {
// for _, s := range state.transitions[epsilon] {
// if s.threadGroups == nil {
// s.threadGroups = newMatch(numGroups + 1)
// }
// copy(s.threadGroups, state.threadGroups)
// if s.groupBegin {
// s.threadGroups[s.groupNum].StartIdx = idx
// // openParenGroups = append(openParenGroups, s.groupNum)
// }
// if s.groupEnd {
// s.threadGroups[s.groupNum].EndIdx = idx
// // closeParenGroups = append(closeParenGroups, s.groupNum)
// }
// }
// rtv = append(rtv, state.transitions[epsilon]...)
// }
// }
// for _, state := range rtv {
// if len(state.transitions[epsilon]) > 0 {
// return rtv, true
// }
// }
// return rtv, false
//}
// zeroMatchPossible returns true if a zero-length match is possible
// from any of the given states, given the string and our position in it.
// It uses the same algorithm to find zero-states as the one inside the loop,
// so I should probably put it in a function.
func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
zeroStates, isZero := takeZeroState(states, numGroups, idx)
tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
tempstates = append(tempstates, states...)
tempstates = append(tempstates, zeroStates...)
num_appended := 0 // number of unique states addded to tempstates
for isZero == true {
zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
if num_appended == 0 { // break if we haven't appended any more unique values
break
}
}
for _, state := range tempstates {
if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
return true
}
}
return false
}
//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
// zeroStates, isZero := takeZeroState(states, numGroups, idx)
// tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
// tempstates = append(tempstates, states...)
// tempstates = append(tempstates, zeroStates...)
// num_appended := 0 // number of unique states addded to tempstates
// for isZero == true {
// zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
// tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
// if num_appended == 0 { // break if we haven't appended any more unique values
// break
// }
// }
// for _, state := range tempstates {
// if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
// return true
// }
// }
// return false
//}
// Prunes the slice by removing overlapping indices.
func pruneIndices(indices []Match) []Match {
@@ -376,17 +376,26 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
// Alternation - enqueue left then right state, and continue
if currentState.isAlternation {
leftState := currentState.leftState
copyThread(leftState, currentState)
currentStates = append(currentStates, *currentState.leftState)
rightState := currentState.rightState
copyThread(rightState, currentState)
currentStates = append(currentStates, *currentState.rightState)
if currentState.isKleene { // Reverse order of adding things
rightState := currentState.splitState
copyThread(rightState, currentState)
currentStates = append(currentStates, *currentState.splitState)
leftState := currentState.next
copyThread(leftState, currentState)
currentStates = append(currentStates, *currentState.next)
} else {
leftState := currentState.next
copyThread(leftState, currentState)
currentStates = append(currentStates, *currentState.next)
rightState := currentState.splitState
copyThread(rightState, currentState)
currentStates = append(currentStates, *currentState.splitState)
}
continue
}
// Empty state - enqueue next state, do _not_ increment the SP
if currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
isEmptyAndNoAssertion = true
}
@@ -396,12 +405,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
if isEmptyAndNoAssertion || foundMatch {
allMatches := make([]nfaState, 0)
for _, v := range currentState.transitions {
dereferenced := funcMap(v, func(s *nfaState) nfaState {
return *s
})
allMatches = append(allMatches, dereferenced...)
}
allMatches = append(allMatches, *(currentState.next))
slices.Reverse(allMatches)
for i := range allMatches {
copyThread(&allMatches[i], currentState)
@@ -419,24 +423,11 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
}
if currentState.isLast && len(nextStates) == 0 { // Last state reached
if foundMatch {
if currentState.assert != noneAssert {
currentState.threadGroups[0].EndIdx = idx
} else {
currentState.threadGroups[0].EndIdx = idx + 1
}
if idx == currentState.threadGroups[0].StartIdx {
idx += 1
}
return true, currentState.threadGroups, idx
} else if isEmptyAndNoAssertion {
currentState.threadGroups[0].EndIdx = idx
if idx == currentState.threadGroups[0].StartIdx {
idx++
}
return true, currentState.threadGroups, idx
currentState.threadGroups[0].EndIdx = idx
if idx == currentState.threadGroups[0].StartIdx {
idx += 1
}
return true, currentState.threadGroups, idx
}
}
currentStates = append([]nfaState{}, nextStates...)

View File

@@ -25,25 +25,25 @@ const (
)
type nfaState struct {
content stateContents // Contents of current state
isEmpty bool // If it is empty - Union operator and Kleene star states will be empty
isLast bool // If it is the last state (acept state)
output []*nfaState // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
transitions map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
isQuestion bool // Identifies whether current node is a 0-state representing the question operator
isAlternation bool // Identifies whether current node is a 0-state representing an alternation
leftState *nfaState // Only for alternation states - the 'left' branch of the alternation
rightState *nfaState // Only for alternation states - the 'right' branch of the alternation
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
lookaroundRegex string // Only for lookaround states - Contents of the regex that the lookaround state holds
lookaroundNFA *nfaState // Holds the NFA of the lookaroundRegex - if it exists
lookaroundNumCaptureGroups int // Number of capturing groups in lookaround regex if current node is a lookaround
groupBegin bool // Whether or not the node starts a capturing group
groupEnd bool // Whether or not the node ends a capturing group
groupNum int // Which capturing group the node starts / ends
content stateContents // Contents of current state
isEmpty bool // If it is empty - Union operator and Kleene star states will be empty
isLast bool // If it is the last state (acept state)
output []*nfaState // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
// transitions map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
next *nfaState // The next state (not for alternation or kleene states)
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
isQuestion bool // Identifies whether current node is a 0-state representing the question operator
isAlternation bool // Identifies whether current node is a 0-state representing an alternation
splitState *nfaState // Only for alternation states - the 'other' branch of the alternation ('next' is the first)
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
lookaroundRegex string // Only for lookaround states - Contents of the regex that the lookaround state holds
lookaroundNFA *nfaState // Holds the NFA of the lookaroundRegex - if it exists
lookaroundNumCaptureGroups int // Number of capturing groups in lookaround regex if current node is a lookaround
groupBegin bool // Whether or not the node starts a capturing group
groupEnd bool // Whether or not the node ends a capturing group
groupNum int // Which capturing group the node starts / ends
// The following properties depend on the current match - I should think about resetting them for every match.
zeroMatchFound bool // Whether or not the state has been used for a zero-length match - only relevant for zero states
threadGroups []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
@@ -73,7 +73,6 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
isEmpty: stateToClone.isEmpty,
isLast: stateToClone.isLast,
output: make([]*nfaState, len(stateToClone.output)),
transitions: make(map[int][]*nfaState),
isKleene: stateToClone.isKleene,
isQuestion: stateToClone.isQuestion,
isAlternation: stateToClone.isAlternation,
@@ -94,28 +93,18 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
clone.output[i] = cloneStateHelper(s, cloneMap)
}
}
for k, v := range stateToClone.transitions {
clone.transitions[k] = make([]*nfaState, len(v))
for i, s := range v {
if s == stateToClone {
clone.transitions[k][i] = clone
} else {
clone.transitions[k][i] = cloneStateHelper(s, cloneMap)
}
}
}
if stateToClone.lookaroundNFA == stateToClone {
clone.lookaroundNFA = clone
}
clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap)
if stateToClone.leftState == stateToClone {
clone.leftState = clone
if stateToClone.splitState == stateToClone {
clone.splitState = clone
}
clone.leftState = cloneStateHelper(stateToClone.leftState, cloneMap)
if stateToClone.rightState == stateToClone {
clone.rightState = clone
clone.splitState = cloneStateHelper(stateToClone.splitState, cloneMap)
if stateToClone.next == stateToClone {
clone.next = clone
}
clone.rightState = cloneStateHelper(stateToClone.rightState, cloneMap)
clone.next = cloneStateHelper(stateToClone.next, cloneMap)
return clone
}
@@ -126,6 +115,9 @@ func resetThreads(start *nfaState) {
}
func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
if state == nil {
return
}
if _, ok := visitedMap[state]; ok {
return
}
@@ -133,10 +125,11 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
state.threadGroups = nil
state.threadSP = 0
visitedMap[state] = true
for _, v := range state.transitions {
for _, nextState := range v {
resetThreadsHelper(nextState, visitedMap)
}
if state.isAlternation {
resetThreadsHelper(state.next, visitedMap)
resetThreadsHelper(state.splitState, visitedMap)
} else {
resetThreadsHelper(state.next, visitedMap)
}
}
@@ -237,74 +230,84 @@ func (s nfaState) isLookaround() bool {
return s.assert == plaAssert || s.assert == plbAssert || s.assert == nlaAssert || s.assert == nlbAssert
}
func (s nfaState) numTransitions() int {
if s.next == nil && s.splitState == nil {
return 0
}
if s.next == nil || s.splitState == nil {
return 1
}
return 2
}
// Returns the matches for the character at the given index of the given string.
// Also returns the number of matches. Returns -1 if an assertion failed.
func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
// Assertions can be viewed as 'checks'. If the check fails, we return
// an empty array and 0.
// If it passes, we treat it like any other state, and return all the transitions.
if s.assert != noneAssert {
if s.checkAssertion(str, idx) == false {
return make([]*nfaState, 0), -1
}
}
listTransitions := s.transitions[int(str[idx])]
for _, dest := range s.transitions[int(anyCharRune)] {
if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
// Add an allChar state to the list of matches if:
// a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
// b. The current character isn't the state's exception list.
listTransitions = append(listTransitions, dest)
}
}
numTransitions := len(listTransitions)
return listTransitions, numTransitions
}
//func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
// // Assertions can be viewed as 'checks'. If the check fails, we return
// // an empty array and 0.
// // If it passes, we treat it like any other state, and return all the transitions.
// if s.assert != noneAssert {
// if s.checkAssertion(str, idx) == false {
// return make([]*nfaState, 0), -1
// }
// }
// listTransitions := s.transitions[int(str[idx])]
// for _, dest := range s.transitions[int(anyCharRune)] {
// if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
// // Add an allChar state to the list of matches if:
// // a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
// // b. The current character isn't the state's exception list.
// listTransitions = append(listTransitions, dest)
// }
// }
// numTransitions := len(listTransitions)
// return listTransitions, numTransitions
//}
// verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
if len(st.transitions) == 0 {
st.isLast = true
return
}
// if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
if len(st.transitions) == 1 { // Eg. a*
var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
for _, c := range st.content {
if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
moreThanOneTrans = true
}
}
st.isLast = !moreThanOneTrans
}
if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
transitionDests := make([]*nfaState, 0)
for _, v := range st.transitions {
transitionDests = append(transitionDests, v...)
}
if allEqual(transitionDests...) {
st.isLast = true
return
}
}
if visited[st] == true {
return
}
visited[st] = true
for _, states := range st.transitions {
for i := range states {
if states[i] != st {
verifyLastStatesHelper(states[i], visited)
}
}
}
}
//func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
// if st.numTransitions() == 0 {
// st.isLast = true
// return
// }
// // if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
// if st.numTransitions() == 1 { // Eg. a*
// var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
// for _, c := range st.content {
// if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
// moreThanOneTrans = true
// }
// }
// st.isLast = !moreThanOneTrans
// }
//
// if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
// transitionDests := make([]*nfaState, 0)
// for _, v := range st.transitions {
// transitionDests = append(transitionDests, v...)
// }
// if allEqual(transitionDests...) {
// st.isLast = true
// return
// }
// }
// if visited[st] == true {
// return
// }
// visited[st] = true
// for _, states := range st.transitions {
// for i := range states {
// if states[i] != st {
// verifyLastStatesHelper(states[i], visited)
// }
// }
// }
//}
// verifyLastStates enables the 'isLast' flag for the leaf nodes (last states)
func verifyLastStates(start []*nfaState) {
verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
}
//func verifyLastStates(start []*nfaState) {
// verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
//}
// Concatenates s1 and s2, returns the start of the concatenation.
func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
@@ -312,69 +315,69 @@ func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
return s2
}
for i := range s1.output {
for _, c := range s2.content { // Create transitions for every element in s1's content to s2'
s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], s2)
}
s1.output[i].next = s2
}
s1.output = s2.output
return s1
}
func kleene(s1 nfaState) (*nfaState, error) {
func kleene(s1 *nfaState) (*nfaState, error) {
if s1.isEmpty && s1.assert != noneAssert {
return nil, fmt.Errorf("previous token is not quantifiable")
}
emptyState := zeroLengthMatchState()
emptyState.assert = noneAssert
toReturn := alternate(&s1, &emptyState)
toReturn := &nfaState{}
toReturn.isEmpty = true
toReturn.isAlternation = true
toReturn.content = newContents(epsilon)
toReturn.splitState = s1
for i := range s1.output {
s1.output[i].next = toReturn
}
// toReturn := &nfaState{}
// toReturn.transitions = make(map[int][]*nfaState)
// toReturn.content = newContents(epsilon)
toReturn.isEmpty = true
toReturn.isKleene = true
toReturn.output = []*nfaState{&emptyState}
toReturn.output = append([]*nfaState{}, toReturn)
for i := range s1.output {
for _, c := range toReturn.content {
s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], toReturn)
}
}
for _, c := range s1.content {
toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
s1.output[i].next = toReturn
}
// for _, c := range s1.content {
// toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
// }
//toReturn.kleeneState = &s1
return toReturn, nil
}
func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
toReturn := &nfaState{}
toReturn.transitions = make(map[int][]*nfaState)
// toReturn.transitions = make(map[int][]*nfaState)
toReturn.output = append(toReturn.output, s1.output...)
toReturn.output = append(toReturn.output, s2.output...)
// Unique append is used here (and elsewhere) to ensure that,
// for any given transition, a state can only be mentioned once.
// For example, given the transition 'a', the state 's1' can only be mentioned once.
// This would lead to multiple instances of the same set of match indices, since both
// 's1' states would be considered to match.
for _, c := range s1.content {
toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
}
for _, c := range s2.content {
toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
}
// // Unique append is used here (and elsewhere) to ensure that,
// // for any given transition, a state can only be mentioned once.
// // For example, given the transition 'a', the state 's1' can only be mentioned once.
// // This would lead to multiple instances of the same set of match indices, since both
// // 's1' states would be considered to match.
// for _, c := range s1.content {
// toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
// }
// for _, c := range s2.content {
// toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
// }
toReturn.content = newContents(epsilon)
toReturn.isEmpty = true
toReturn.isAlternation = true
toReturn.leftState = s1
toReturn.rightState = s2
toReturn.next = s1
toReturn.splitState = s2
return toReturn
}
func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
s2 := &nfaState{}
s2.transitions = make(map[int][]*nfaState)
// s2.transitions = make(map[int][]*nfaState)
s2.content = newContents(epsilon)
s2.output = append(s2.output, s2)
s2.isEmpty = true
@@ -385,8 +388,8 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
// Creates and returns a new state with the 'default' values.
func newState() nfaState {
ret := nfaState{
output: make([]*nfaState, 0),
transitions: make(map[int][]*nfaState),
output: make([]*nfaState, 0),
// transitions: make(map[int][]*nfaState),
assert: noneAssert,
except: append([]rune{}, 0),
lookaroundRegex: "",