Implement PCRE Matching (prefer left-branch) #2
18
regex/nfa.go
18
regex/nfa.go
@@ -33,6 +33,8 @@ type nfaState struct {
|
|||||||
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
|
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
|
||||||
isQuestion bool // Identifies whether current node is a 0-state representing the question operator
|
isQuestion bool // Identifies whether current node is a 0-state representing the question operator
|
||||||
isAlternation bool // Identifies whether current node is a 0-state representing an alternation
|
isAlternation bool // Identifies whether current node is a 0-state representing an alternation
|
||||||
|
leftState *nfaState // Only for alternation states - the 'left' branch of the alternation
|
||||||
|
rightState *nfaState // Only for alternation states - the 'right' branch of the alternation
|
||||||
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
|
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
|
||||||
allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
|
allChars bool // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
|
||||||
except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
|
except []rune // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
|
||||||
@@ -106,6 +108,15 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
|
|||||||
clone.lookaroundNFA = clone
|
clone.lookaroundNFA = clone
|
||||||
}
|
}
|
||||||
clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap)
|
clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap)
|
||||||
|
if stateToClone.leftState == stateToClone {
|
||||||
|
clone.leftState = clone
|
||||||
|
}
|
||||||
|
clone.leftState = cloneStateHelper(stateToClone.leftState, cloneMap)
|
||||||
|
if stateToClone.rightState == stateToClone {
|
||||||
|
clone.rightState = clone
|
||||||
|
}
|
||||||
|
clone.rightState = cloneStateHelper(stateToClone.rightState, cloneMap)
|
||||||
|
|
||||||
return clone
|
return clone
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -213,6 +224,9 @@ func (s nfaState) contentContains(str []rune, idx int) bool {
|
|||||||
if s.assert != noneAssert {
|
if s.assert != noneAssert {
|
||||||
return s.checkAssertion(str, idx)
|
return s.checkAssertion(str, idx)
|
||||||
}
|
}
|
||||||
|
if idx >= len(str) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
if s.allChars {
|
if s.allChars {
|
||||||
return !slices.Contains(slices.Concat(notDotChars, s.except), str[idx]) // Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
|
return !slices.Contains(slices.Concat(notDotChars, s.except), str[idx]) // Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
|
||||||
}
|
}
|
||||||
@@ -348,6 +362,8 @@ func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
|
|||||||
toReturn.content = newContents(epsilon)
|
toReturn.content = newContents(epsilon)
|
||||||
toReturn.isEmpty = true
|
toReturn.isEmpty = true
|
||||||
toReturn.isAlternation = true
|
toReturn.isAlternation = true
|
||||||
|
toReturn.leftState = s1
|
||||||
|
toReturn.rightState = s2
|
||||||
|
|
||||||
return toReturn
|
return toReturn
|
||||||
}
|
}
|
||||||
@@ -358,7 +374,7 @@ func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
|
|||||||
s2.content = newContents(epsilon)
|
s2.content = newContents(epsilon)
|
||||||
s2.output = append(s2.output, s2)
|
s2.output = append(s2.output, s2)
|
||||||
s2.isEmpty = true
|
s2.isEmpty = true
|
||||||
s2.isQuestion = true
|
s2.isAlternation = true
|
||||||
s3 := alternate(s1, s2)
|
s3 := alternate(s1, s2)
|
||||||
return s3
|
return s3
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user