Added detection of empty parentheses, as zero-length matches
This commit is contained in:
35
compile.go
35
compile.go
@@ -764,15 +764,36 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
// and then some other node.
|
||||
// These three nodes (LPAREN, the middle node and RPAREN) are extracted together, concatenated
|
||||
// and added back in.
|
||||
// If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN
|
||||
// and RPAREN nodes.
|
||||
// If neither node exists, that's a problem so I return an error.
|
||||
if c.nodetype == RPAREN {
|
||||
s.groupEnd = true
|
||||
middleNode := mustPop(&nfa)
|
||||
lparenNode := mustPop(&nfa)
|
||||
s.groupNum = lparenNode.groupNum
|
||||
tmp := concatenate(lparenNode, middleNode)
|
||||
to_add := concatenate(tmp, s)
|
||||
nfa = append(nfa, to_add)
|
||||
|
||||
middleNode, err1 := pop(&nfa)
|
||||
lparenNode, err2 := pop(&nfa)
|
||||
if err1 != nil && err2 != nil {
|
||||
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
|
||||
} else if err2 != nil { // There was no third node. ie. something like '()'
|
||||
lparenNode = middleNode
|
||||
if lparenNode.groupBegin != true { // There are only two nodes, but the first one isn't an LPAREN.
|
||||
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
|
||||
}
|
||||
s.groupNum = lparenNode.groupNum
|
||||
to_add := concatenate(lparenNode, s)
|
||||
nfa = append(nfa, to_add)
|
||||
} else {
|
||||
// At this point, we assume all three nodes are valid ('lparenNode', 'middleNode' and 's')
|
||||
if lparenNode.groupBegin {
|
||||
s.groupNum = lparenNode.groupNum
|
||||
} else if middleNode.groupBegin { // Something like 'a()'
|
||||
s.groupNum = middleNode.groupNum
|
||||
} else { // A middleNode and lparenNode exist, but neither is actually an LPAREN.
|
||||
return Reg{}, fmt.Errorf("Imbalanced parentheses.")
|
||||
}
|
||||
tmp := concatenate(lparenNode, middleNode)
|
||||
to_add := concatenate(tmp, s)
|
||||
nfa = append(nfa, to_add)
|
||||
}
|
||||
}
|
||||
}
|
||||
if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated
|
||||
|
Reference in New Issue
Block a user