Got rid of transitions parameter, changed how kleene state is processed
I replaced the transition parameter for nfaState, replacing it with a single nfaState pointer. This is because any non-alternation state will only have one next state, so the map was just added complexity. I changed alternation processing - instead of having their own dedicated fields, they just use the new 'next' parameter, and another one called 'splitState'. I also changed the kleene state processing to remove the unecessary empty state in the right-side alternation (it actually messed up my matching).
This commit is contained in:
@@ -822,7 +822,6 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
for _, c := range re {
|
||||
if c.nodetype == characterNode || c.nodetype == assertionNode {
|
||||
stateToAdd := nfaState{}
|
||||
stateToAdd.transitions = make(map[int][]*nfaState)
|
||||
if c.allChars {
|
||||
stateToAdd.allChars = true
|
||||
if len(c.except) != 0 {
|
||||
@@ -934,7 +933,6 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
s.isEmpty = true
|
||||
s.output = make([]*nfaState, 0)
|
||||
s.output = append(s.output, s)
|
||||
s.transitions = make(map[int][]*nfaState)
|
||||
// LPAREN nodes are just added normally
|
||||
if c.nodetype == lparenNode {
|
||||
numGroups++
|
||||
@@ -966,7 +964,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
s.groupNum = lparenNode.groupNum
|
||||
to_add := concatenate(lparenNode, s)
|
||||
nfa = append(nfa, to_add)
|
||||
} else if middleNode.groupBegin && len(middleNode.transitions) == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
|
||||
} else if middleNode.groupBegin && middleNode.numTransitions() == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
|
||||
nfa = append(nfa, lparenNode) // I shouldn't have popped this out, because it is not involved in the current capturing group
|
||||
s.groupNum = middleNode.groupNum // In this case, the 'middle' node is actually an lparen
|
||||
to_add := concatenate(middleNode, s)
|
||||
@@ -1030,14 +1028,14 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("error applying kleene star")
|
||||
}
|
||||
stateToAdd, err := kleene(*s1)
|
||||
stateToAdd, err := kleene(s1)
|
||||
if err != nil {
|
||||
return Reg{}, err
|
||||
}
|
||||
nfa = append(nfa, stateToAdd)
|
||||
case plusNode: // a+ is equivalent to aa*
|
||||
s1 := mustPop(&nfa)
|
||||
s2, err := kleene(*s1)
|
||||
s2, err := kleene(s1)
|
||||
if err != nil {
|
||||
return Reg{}, err
|
||||
}
|
||||
@@ -1061,14 +1059,14 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
// '^|a'
|
||||
s2, err1 := pop(&nfa)
|
||||
s1, err2 := pop(&nfa)
|
||||
if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN
|
||||
if err2 != nil || (s2.groupBegin && s2.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
|
||||
if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back
|
||||
nfa = append(nfa, s2)
|
||||
}
|
||||
tmp := zeroLengthMatchState()
|
||||
s2 = &tmp
|
||||
}
|
||||
if err1 != nil || (s1.groupBegin && len(s1.transitions) == 0) { // Doesn't exist, or its just an LPAREN
|
||||
if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
|
||||
if err1 == nil { // See above for explanation
|
||||
nfa = append(nfa, s1)
|
||||
}
|
||||
@@ -1100,7 +1098,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
stateToAdd = concatenate(stateToAdd, cloneState(poppedState))
|
||||
}
|
||||
if c.endReps == infinite_reps { // Case 3
|
||||
s2, err := kleene(*poppedState)
|
||||
s2, err := kleene(poppedState)
|
||||
if err != nil {
|
||||
return Reg{}, err
|
||||
}
|
||||
@@ -1117,7 +1115,10 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
return Reg{}, fmt.Errorf("invalid regex")
|
||||
}
|
||||
|
||||
verifyLastStates(nfa)
|
||||
lastState := newState()
|
||||
lastState.isLast = true
|
||||
|
||||
concatenate(nfa[0], &lastState)
|
||||
|
||||
return Reg{nfa[0], numGroups}, nil
|
||||
|
||||
|
Reference in New Issue
Block a user