Got rid of transitions parameter, changed how kleene state is processed

I replaced the transition parameter for nfaState, replacing it with a
single nfaState pointer. This is because any non-alternation state will
only have one next state, so the map was just added complexity.

I changed alternation processing - instead of having their own dedicated
fields, they just use the new 'next' parameter, and another one called
'splitState'.

I also changed the kleene state processing to remove the unecessary
empty state in the right-side alternation (it actually messed up my
matching).
This commit is contained in:
2025-02-05 22:20:28 -05:00
parent 858e535fba
commit cca8c7cda2
2 changed files with 147 additions and 143 deletions

View File

@@ -822,7 +822,6 @@ func thompson(re []postfixNode) (Reg, error) {
for _, c := range re {
if c.nodetype == characterNode || c.nodetype == assertionNode {
stateToAdd := nfaState{}
stateToAdd.transitions = make(map[int][]*nfaState)
if c.allChars {
stateToAdd.allChars = true
if len(c.except) != 0 {
@@ -934,7 +933,6 @@ func thompson(re []postfixNode) (Reg, error) {
s.isEmpty = true
s.output = make([]*nfaState, 0)
s.output = append(s.output, s)
s.transitions = make(map[int][]*nfaState)
// LPAREN nodes are just added normally
if c.nodetype == lparenNode {
numGroups++
@@ -966,7 +964,7 @@ func thompson(re []postfixNode) (Reg, error) {
s.groupNum = lparenNode.groupNum
to_add := concatenate(lparenNode, s)
nfa = append(nfa, to_add)
} else if middleNode.groupBegin && len(middleNode.transitions) == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
} else if middleNode.groupBegin && middleNode.numTransitions() == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
nfa = append(nfa, lparenNode) // I shouldn't have popped this out, because it is not involved in the current capturing group
s.groupNum = middleNode.groupNum // In this case, the 'middle' node is actually an lparen
to_add := concatenate(middleNode, s)
@@ -1030,14 +1028,14 @@ func thompson(re []postfixNode) (Reg, error) {
if err != nil {
return Reg{}, fmt.Errorf("error applying kleene star")
}
stateToAdd, err := kleene(*s1)
stateToAdd, err := kleene(s1)
if err != nil {
return Reg{}, err
}
nfa = append(nfa, stateToAdd)
case plusNode: // a+ is equivalent to aa*
s1 := mustPop(&nfa)
s2, err := kleene(*s1)
s2, err := kleene(s1)
if err != nil {
return Reg{}, err
}
@@ -1061,14 +1059,14 @@ func thompson(re []postfixNode) (Reg, error) {
// '^|a'
s2, err1 := pop(&nfa)
s1, err2 := pop(&nfa)
if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN
if err2 != nil || (s2.groupBegin && s2.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back
nfa = append(nfa, s2)
}
tmp := zeroLengthMatchState()
s2 = &tmp
}
if err1 != nil || (s1.groupBegin && len(s1.transitions) == 0) { // Doesn't exist, or its just an LPAREN
if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
if err1 == nil { // See above for explanation
nfa = append(nfa, s1)
}
@@ -1100,7 +1098,7 @@ func thompson(re []postfixNode) (Reg, error) {
stateToAdd = concatenate(stateToAdd, cloneState(poppedState))
}
if c.endReps == infinite_reps { // Case 3
s2, err := kleene(*poppedState)
s2, err := kleene(poppedState)
if err != nil {
return Reg{}, err
}
@@ -1117,7 +1115,10 @@ func thompson(re []postfixNode) (Reg, error) {
return Reg{}, fmt.Errorf("invalid regex")
}
verifyLastStates(nfa)
lastState := newState()
lastState.isLast = true
concatenate(nfa[0], &lastState)
return Reg{nfa[0], numGroups}, nil