}elseifmiddleNode.groupBegin&&len(middleNode.transitions)==0{// The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
}elseifmiddleNode.groupBegin&&middleNode.numTransitions()==0{// The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
nfa=append(nfa,lparenNode)// I shouldn't have popped this out, because it is not involved in the current capturing group
nfa=append(nfa,lparenNode)// I shouldn't have popped this out, because it is not involved in the current capturing group
s.groupNum=middleNode.groupNum// In this case, the 'middle' node is actually an lparen
s.groupNum=middleNode.groupNum// In this case, the 'middle' node is actually an lparen
// Increment until we hit a character matching the start state (assuming not 0-state)
ifstart.isEmpty==false{
fori<len(str)&&!start.contentContains(str,i){
i++
}
startIdx=i
startingFrom=i
i++// Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
}
start.threadGroups=newMatch(numGroups+1)
start.threadGroups=newMatch(numGroups+1)
// Check if the start state begins a group - if so, add the start index to our list
// Keep taking zero-states, until there are no more left to take
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
lastStateInList:=false// Whether or not a last state was in our list of states
if!preferLongest{
varlastStatePtr*nfaState=nil// Pointer to the last-state, if it was found
lastLookaroundInList:=false// Whether or not a last state (that is a lookaround) was in our list of states
fornumStatesMatched==0&&lastStateInList==false{
iflen(currentStates)==0{
break
break
}
}
state,_:=pop(¤tStates)
}elseif!currentState.isAlternation&&!currentState.isKleene&&!currentState.isQuestion&&!currentState.groupBegin&&!currentState.groupEnd&¤tState.assert==noneAssert{// Normal character
iftempIndices[0].StartIdx==tempIndices[0].EndIdx{// If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
returntrue,tempIndices,tempIndices[0].EndIdx+1
}else{
}else{
returntrue,tempIndices,tempIndices[0].EndIdx
dst+="$"+numStr
}
}
}
}
returnfalse,[]Group{},startIdx
}
}
currentStates=make([]*nfaState,len(tempStates))
}else{
copy(currentStates,tempStates)
dst+=string(c)
tempStates=nil
i++
i++
}
}
// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
iftempIndices[0].StartIdx==tempIndices[0].EndIdx{// If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
returntrue,tempIndices,tempIndices[0].EndIdx+1
}else{
}else{
returntrue,tempIndices,tempIndices[0].EndIdx
complete=false
}
}
ifstartIdx==startingFrom{// Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
isEmptybool// If it is empty - Union operator and Kleene star states will be empty
isEmptybool// If it is empty - Union operator and Kleene star states will be empty
isLastbool// If it is the last state (acept state)
isLastbool// If it is the last state (acept state)
output[]*nfaState// The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
output[]*nfaState// The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
transitionsmap[int][]*nfaState// Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
// transitions map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
next*nfaState// The next state (not for alternation or kleene states)
isKleenebool// Identifies whether current node is a 0-state representing Kleene star
isKleenebool// Identifies whether current node is a 0-state representing Kleene star
isQuestionbool// Identifies whether current node is a 0-state representing the question operator
isQuestionbool// Identifies whether current node is a 0-state representing the question operator
isAlternationbool// Identifies whether current node is a 0-state representing an alternation
isAlternationbool// Identifies whether current node is a 0-state representing an alternation
splitState*nfaState// Only for alternation states - the 'other' branch of the alternation ('next' is the first)
assertassertType// Type of assertion of current node - NONE means that the node doesn't assert anything
assertassertType// Type of assertion of current node - NONE means that the node doesn't assert anything
allCharsbool// Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
allCharsbool// Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
except[]rune// Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
except[]rune// Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
return!slices.Contains(slices.Concat(notDotChars,s.except),str[idx])// Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
return!slices.Contains(slices.Concat(notDotChars,s.except),str[idx])// Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
@ -222,74 +228,84 @@ func (s nfaState) isLookaround() bool {
ifst.isKleene{// A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
// verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
// // if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
return
// if st.numTransitions() == 1 { // Eg. a*
}
// var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
}
// for _, c := range st.content {
ifvisited[st]==true{
// if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
return
// moreThanOneTrans = true
}
// }
visited[st]=true
// }
for_,states:=rangest.transitions{
// st.isLast = !moreThanOneTrans
fori:=rangestates{
// }
ifstates[i]!=st{
//
verifyLastStatesHelper(states[i],visited)
// if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state