|
|
@ -658,6 +658,21 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
func thompson(re []postfixNode) (Reg, error) {
|
|
|
|
func thompson(re []postfixNode) (Reg, error) {
|
|
|
|
nfa := make([]*State, 0) // Stack of states
|
|
|
|
nfa := make([]*State, 0) // Stack of states
|
|
|
|
numGroups := 0 // Number of capturing groups
|
|
|
|
numGroups := 0 // Number of capturing groups
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// If thompson() receives an empty regex, then whatever was given to shuntingYard()
|
|
|
|
|
|
|
|
// was parsed away. This doesn't mean that the regex itself is empty.
|
|
|
|
|
|
|
|
// For example, it could have been '(?:)'. This is an empty non-capturing group. Since
|
|
|
|
|
|
|
|
// shuntingYard() doesn't include non-capturing groups in its output (and the group contains
|
|
|
|
|
|
|
|
// nothing), the output of shuntingYard() (and the input to thompson()) ends up being empty.
|
|
|
|
|
|
|
|
// In these cases, we will return an NFA with 1 state, with an assertion that is always true.
|
|
|
|
|
|
|
|
if len(re) == 0 {
|
|
|
|
|
|
|
|
start := newState()
|
|
|
|
|
|
|
|
start.content = newContents(EPSILON)
|
|
|
|
|
|
|
|
start.isEmpty = true
|
|
|
|
|
|
|
|
start.assert = ALWAYS_TRUE
|
|
|
|
|
|
|
|
nfa = append(nfa, &start)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for _, c := range re {
|
|
|
|
for _, c := range re {
|
|
|
|
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
|
|
|
|
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
|
|
|
|
state := State{}
|
|
|
|
state := State{}
|
|
|
|