}elseifmiddleNode.groupBegin&&len(middleNode.transitions)==0{// The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
}elseifmiddleNode.groupBegin&&middleNode.numTransitions()==0{// The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
nfa=append(nfa,lparenNode)// I shouldn't have popped this out, because it is not involved in the current capturing group
nfa=append(nfa,lparenNode)// I shouldn't have popped this out, because it is not involved in the current capturing group
s.groupNum=middleNode.groupNum// In this case, the 'middle' node is actually an lparen
s.groupNum=middleNode.groupNum// In this case, the 'middle' node is actually an lparen
// Increment until we hit a character matching the start state (assuming not 0-state)
ifstart.isEmpty==false{
fori<len(str)&&!start.contentContains(str,i){
i++
}
startIdx=i
startingFrom=i
i++// Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
}
start.threadGroups=newMatch(numGroups+1)
start.threadGroups=newMatch(numGroups+1)
// Check if the start state begins a group - if so, add the start index to our list
// Keep taking zero-states, until there are no more left to take
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
}elseif!currentState.isAlternation&&!currentState.isKleene&&!currentState.isQuestion&&!currentState.groupBegin&&!currentState.groupEnd&¤tState.assert==noneAssert{// Normal character
iftempIndices[0].StartIdx==tempIndices[0].EndIdx{// If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
iftempIndices[0].StartIdx==tempIndices[0].EndIdx{// If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
state=state.next
returntrue,tempIndices,tempIndices[0].EndIdx+1
continue
}else{
returntrue,tempIndices,tempIndices[0].EndIdx
}
}
prefix+=string(rune(state.content[0]))
state=state.next
}
}
ifstartIdx==startingFrom{// Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
isEmptybool// If it is empty - Union operator and Kleene star states will be empty
isEmptybool// If it is empty - Union operator and Kleene star states will be empty
isLastbool// If it is the last state (acept state)
isLastbool// If it is the last state (acept state)
output[]*nfaState// The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
output[]*nfaState// The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
transitionsmap[int][]*nfaState// Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
// transitions map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
isKleenebool// Identifies whether current node is a 0-state representing Kleene star
next*nfaState// The next state (not for alternation or kleene states)
isQuestionbool// Identifies whether current node is a 0-state representing the question operator
isKleenebool// Identifies whether current node is a 0-state representing Kleene star
isAlternationbool// Identifies whether current node is a 0-state representing an alternation
isQuestionbool// Identifies whether current node is a 0-state representing the question operator
assertassertType// Type of assertion of current node - NONE means that the node doesn't assert anything
isAlternationbool// Identifies whether current node is a 0-state representing an alternation
allCharsbool// Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
splitState*nfaState// Only for alternation states - the 'other' branch of the alternation ('next' is the first)
except[]rune// Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
assertassertType// Type of assertion of current node - NONE means that the node doesn't assert anything
lookaroundRegexstring// Only for lookaround states - Contents of the regex that the lookaround state holds
allCharsbool// Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
lookaroundNFA*nfaState// Holds the NFA of the lookaroundRegex - if it exists
except[]rune// Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
lookaroundNumCaptureGroupsint// Number of capturing groups in lookaround regex if current node is a lookaround
lookaroundRegexstring// Only for lookaround states - Contents of the regex that the lookaround state holds
groupBeginbool// Whether or not the node starts a capturing group
lookaroundNFA*nfaState// Holds the NFA of the lookaroundRegex - if it exists
groupEndbool// Whether or not the node ends a capturing group
lookaroundNumCaptureGroupsint// Number of capturing groups in lookaround regex if current node is a lookaround
groupNumint// Which capturing group the node starts / ends
groupBeginbool// Whether or not the node starts a capturing group
groupEndbool// Whether or not the node ends a capturing group
groupNumint// Which capturing group the node starts / ends
// The following properties depend on the current match - I should think about resetting them for every match.
// The following properties depend on the current match - I should think about resetting them for every match.
zeroMatchFoundbool// Whether or not the state has been used for a zero-length match - only relevant for zero states
zeroMatchFoundbool// Whether or not the state has been used for a zero-length match - only relevant for zero states
threadGroups[]Group// Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
threadGroups[]Group// Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
return!slices.Contains(slices.Concat(notDotChars,s.except),str[idx])// Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
return!slices.Contains(slices.Concat(notDotChars,s.except),str[idx])// Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
@ -222,74 +228,84 @@ func (s nfaState) isLookaround() bool {
ifst.isKleene{// A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
// verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
// // if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
return
// if st.numTransitions() == 1 { // Eg. a*
}
// var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
}
// for _, c := range st.content {
ifvisited[st]==true{
// if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
return
// moreThanOneTrans = true
}
// }
visited[st]=true
// }
for_,states:=rangest.transitions{
// st.isLast = !moreThanOneTrans
fori:=rangestates{
// }
ifstates[i]!=st{
//
verifyLastStatesHelper(states[i],visited)
// if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state