diff --git a/compile.go b/compile.go index 759a482..28038cf 100644 --- a/compile.go +++ b/compile.go @@ -666,10 +666,7 @@ func thompson(re []postfixNode) (Reg, error) { // nothing), the output of shuntingYard() (and the input to thompson()) ends up being empty. // In these cases, we will return an NFA with 1 state, with an assertion that is always true. if len(re) == 0 { - start := newState() - start.content = newContents(EPSILON) - start.isEmpty = true - start.assert = ALWAYS_TRUE + start := zeroLengthMatchState() nfa = append(nfa, &start) } @@ -852,8 +849,30 @@ func thompson(re []postfixNode) (Reg, error) { s2 := question(s1) nfa = append(nfa, s2) case PIPE: - s1 := mustPop(&nfa) - s2 := mustPop(&nfa) + // A pipe operator doesn't actually need either operand to be present. If an operand isn't present, + // it is replaced with an implicit 'matchZeroLength' state (this is the same thing that we add at the top if our + // input has zero postfixNodes). + // Things to think about: + // 'a|' + // '|a' + // '^a|' + // '^|a' + s1, err1 := pop(&nfa) + s2, err2 := pop(&nfa) + if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN + if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back + nfa = append(nfa, s2) + } + tmp := zeroLengthMatchState() + s2 = &tmp + } + if err1 != nil || (s1.groupBegin && len(s1.transitions) == 0) { // Doesn't exist, or its just an LPAREN + if err1 == nil { // See above for explanation + nfa = append(nfa, s1) + } + tmp := zeroLengthMatchState() + s1 = &tmp + } s3 := alternate(s1, s2) nfa = append(nfa, s3) }