diff --git a/nfa.go b/nfa.go index 69b27d5..8436e91 100644 --- a/nfa.go +++ b/nfa.go @@ -23,7 +23,8 @@ type State struct { isKleene bool // Identifies whether current node is a 0-state representing Kleene star assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything zeroMatchFound bool // Whether or not the state has been used for a zero-length match - only relevant for zero states - isDot bool // Whether or not the state represents a 'dot' metacharacter. A 'dot' node doesn't store any contents directly, as it would take up too much space + allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space + except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes. } // Clones the NFA starting from the given state. @@ -50,6 +51,8 @@ func cloneStateHelper(state *State, cloneMap map[*State]*State) *State { isKleene: state.isKleene, assert: state.assert, zeroMatchFound: state.zeroMatchFound, + allChars: state.allChars, + except: append([]rune{}, state.except...), } cloneMap[state] = clone for i, s := range state.output { @@ -95,8 +98,8 @@ func (s State) contentContains(str []rune, idx int) bool { if s.assert != NONE { return s.checkAssertion(str, idx) } - if s.isDot { - return !slices.Contains(notDotChars, str[idx]) + if s.allChars { + return !slices.Contains(slices.Concat(notDotChars, s.except), str[idx]) // Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node. } // Default - s.assert must be NONE return slices.Contains(s.content, int(str[idx])) @@ -122,7 +125,12 @@ func (s State) matchesFor(str []rune, idx int) ([]*State, int) { } listTransitions := s.transitions[int(str[idx])] for _, dest := range s.transitions[int(ANY_CHAR)] { - listTransitions = append(listTransitions, dest) + if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) { + // Add an allChar state to the list of matches if: + // a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't. + // b. The current character isn't the state's exception list. + listTransitions = append(listTransitions, dest) + } } numTransitions := len(listTransitions) return listTransitions, numTransitions