Added field to denote all characters which an 'allChars' node _shouldn't_ match (useful for invertinc character classes
This commit is contained in:
16
nfa.go
16
nfa.go
@@ -23,7 +23,8 @@ type State struct {
|
|||||||
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
|
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
|
||||||
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
|
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
|
||||||
zeroMatchFound bool // Whether or not the state has been used for a zero-length match - only relevant for zero states
|
zeroMatchFound bool // Whether or not the state has been used for a zero-length match - only relevant for zero states
|
||||||
isDot bool // Whether or not the state represents a 'dot' metacharacter. A 'dot' node doesn't store any contents directly, as it would take up too much space
|
allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
|
||||||
|
except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clones the NFA starting from the given state.
|
// Clones the NFA starting from the given state.
|
||||||
@@ -50,6 +51,8 @@ func cloneStateHelper(state *State, cloneMap map[*State]*State) *State {
|
|||||||
isKleene: state.isKleene,
|
isKleene: state.isKleene,
|
||||||
assert: state.assert,
|
assert: state.assert,
|
||||||
zeroMatchFound: state.zeroMatchFound,
|
zeroMatchFound: state.zeroMatchFound,
|
||||||
|
allChars: state.allChars,
|
||||||
|
except: append([]rune{}, state.except...),
|
||||||
}
|
}
|
||||||
cloneMap[state] = clone
|
cloneMap[state] = clone
|
||||||
for i, s := range state.output {
|
for i, s := range state.output {
|
||||||
@@ -95,8 +98,8 @@ func (s State) contentContains(str []rune, idx int) bool {
|
|||||||
if s.assert != NONE {
|
if s.assert != NONE {
|
||||||
return s.checkAssertion(str, idx)
|
return s.checkAssertion(str, idx)
|
||||||
}
|
}
|
||||||
if s.isDot {
|
if s.allChars {
|
||||||
return !slices.Contains(notDotChars, str[idx])
|
return !slices.Contains(slices.Concat(notDotChars, s.except), str[idx]) // Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
|
||||||
}
|
}
|
||||||
// Default - s.assert must be NONE
|
// Default - s.assert must be NONE
|
||||||
return slices.Contains(s.content, int(str[idx]))
|
return slices.Contains(s.content, int(str[idx]))
|
||||||
@@ -122,7 +125,12 @@ func (s State) matchesFor(str []rune, idx int) ([]*State, int) {
|
|||||||
}
|
}
|
||||||
listTransitions := s.transitions[int(str[idx])]
|
listTransitions := s.transitions[int(str[idx])]
|
||||||
for _, dest := range s.transitions[int(ANY_CHAR)] {
|
for _, dest := range s.transitions[int(ANY_CHAR)] {
|
||||||
listTransitions = append(listTransitions, dest)
|
if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
|
||||||
|
// Add an allChar state to the list of matches if:
|
||||||
|
// a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
|
||||||
|
// b. The current character isn't the state's exception list.
|
||||||
|
listTransitions = append(listTransitions, dest)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
numTransitions := len(listTransitions)
|
numTransitions := len(listTransitions)
|
||||||
return listTransitions, numTransitions
|
return listTransitions, numTransitions
|
||||||
|
Reference in New Issue
Block a user