From de0d7345a8792180d05823067b92dc7934b927eb Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Mon, 3 Feb 2025 21:59:05 -0500 Subject: [PATCH] Store left and right branches of alternation separately --- regex/nfa.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/regex/nfa.go b/regex/nfa.go index 0ceea1b..f03edab 100644 --- a/regex/nfa.go +++ b/regex/nfa.go @@ -33,6 +33,8 @@ type nfaState struct { isKleene bool // Identifies whether current node is a 0-state representing Kleene star isQuestion bool // Identifies whether current node is a 0-state representing the question operator isAlternation bool // Identifies whether current node is a 0-state representing an alternation + leftState *nfaState // Only for alternation states - the 'left' branch of the alternation + rightState *nfaState // Only for alternation states - the 'right' branch of the alternation assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes. @@ -106,6 +108,15 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState) clone.lookaroundNFA = clone } clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap) + if stateToClone.leftState == stateToClone { + clone.leftState = clone + } + clone.leftState = cloneStateHelper(stateToClone.leftState, cloneMap) + if stateToClone.rightState == stateToClone { + clone.rightState = clone + } + clone.rightState = cloneStateHelper(stateToClone.rightState, cloneMap) + return clone } @@ -213,6 +224,9 @@ func (s nfaState) contentContains(str []rune, idx int) bool { if s.assert != noneAssert { return s.checkAssertion(str, idx) } + if idx >= len(str) { + return false + } if s.allChars { return !slices.Contains(slices.Concat(notDotChars, s.except), str[idx]) // Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node. } @@ -348,6 +362,8 @@ func alternate(s1 *nfaState, s2 *nfaState) *nfaState { toReturn.content = newContents(epsilon) toReturn.isEmpty = true toReturn.isAlternation = true + toReturn.leftState = s1 + toReturn.rightState = s2 return toReturn } @@ -358,7 +374,7 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|) s2.content = newContents(epsilon) s2.output = append(s2.output, s2) s2.isEmpty = true - s2.isQuestion = true + s2.isAlternation = true s3 := alternate(s1, s2) return s3 }