I replaced the transition parameter for nfaState, replacing it with a
single nfaState pointer. This is because any non-alternation state will
only have one next state, so the map was just added complexity.
I changed alternation processing - instead of having their own dedicated
fields, they just use the new 'next' parameter, and another one called
'splitState'.
I also changed the kleene state processing to remove the unecessary
empty state in the right-side alternation (it actually messed up my
matching).
}elseifmiddleNode.groupBegin&&len(middleNode.transitions)==0{// The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
}elseifmiddleNode.groupBegin&&middleNode.numTransitions()==0{// The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
nfa=append(nfa,lparenNode)// I shouldn't have popped this out, because it is not involved in the current capturing group
nfa=append(nfa,lparenNode)// I shouldn't have popped this out, because it is not involved in the current capturing group
s.groupNum=middleNode.groupNum// In this case, the 'middle' node is actually an lparen
s.groupNum=middleNode.groupNum// In this case, the 'middle' node is actually an lparen
isEmptybool// If it is empty - Union operator and Kleene star states will be empty
isEmptybool// If it is empty - Union operator and Kleene star states will be empty
isLastbool// If it is the last state (acept state)
isLastbool// If it is the last state (acept state)
output[]*nfaState// The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
output[]*nfaState// The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
transitionsmap[int][]*nfaState// Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
// transitions map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
isKleene bool// Identifies whether current node is a 0-state representing Kleene star
next *nfaState// The next state (not for alternation or kleene states)
isQuestion bool// Identifies whether current node is a 0-state representing the question operator
isKleene bool// Identifies whether current node is a 0-state representing Kleene star
isAlternation bool// Identifies whether current node is a 0-state representing an alternation
isQuestion bool// Identifies whether current node is a 0-state representing the question operator
leftState *nfaState// Only for alternation states - the 'left' branch of the alternation
isAlternation bool// Identifies whether current node is a 0-state representing an alternation
rightState *nfaState// Only for alternation states - the 'right' branch of the alternation
splitState *nfaState// Only for alternation states - the 'other' branch of the alternation ('next' is the first)
assertassertType// Type of assertion of current node - NONE means that the node doesn't assert anything
assertassertType// Type of assertion of current node - NONE means that the node doesn't assert anything
allCharsbool// Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
allCharsbool// Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
except[]rune// Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
except[]rune// Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
lookaroundRegexstring// Only for lookaround states - Contents of the regex that the lookaround state holds
lookaroundRegexstring// Only for lookaround states - Contents of the regex that the lookaround state holds
lookaroundNFA*nfaState// Holds the NFA of the lookaroundRegex - if it exists
lookaroundNFA*nfaState// Holds the NFA of the lookaroundRegex - if it exists
lookaroundNumCaptureGroupsint// Number of capturing groups in lookaround regex if current node is a lookaround
lookaroundNumCaptureGroupsint// Number of capturing groups in lookaround regex if current node is a lookaround
groupBeginbool// Whether or not the node starts a capturing group
groupBeginbool// Whether or not the node starts a capturing group
groupEndbool// Whether or not the node ends a capturing group
groupEndbool// Whether or not the node ends a capturing group
groupNumint// Which capturing group the node starts / ends
groupNumint// Which capturing group the node starts / ends
// The following properties depend on the current match - I should think about resetting them for every match.
// The following properties depend on the current match - I should think about resetting them for every match.
zeroMatchFoundbool// Whether or not the state has been used for a zero-length match - only relevant for zero states
zeroMatchFoundbool// Whether or not the state has been used for a zero-length match - only relevant for zero states
threadGroups[]Group// Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
threadGroups[]Group// Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
ifst.isKleene{// A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
// verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
// // if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
return
// if st.numTransitions() == 1 { // Eg. a*
}
// var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
}
// for _, c := range st.content {
ifvisited[st]==true{
// if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
return
// moreThanOneTrans = true
}
// }
visited[st]=true
// }
for_,states:=rangest.transitions{
// st.isLast = !moreThanOneTrans
fori:=rangestates{
// }
ifstates[i]!=st{
//
verifyLastStatesHelper(states[i],visited)
// if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state