From 4966a222f9735b6aaaa9ee6c1caed8fabc7d66ca Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Sat, 25 Jan 2025 12:44:40 -0500 Subject: [PATCH] Added detection of empty parentheses, as zero-length matches --- compile.go | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/compile.go b/compile.go index e57ab66..0566133 100644 --- a/compile.go +++ b/compile.go @@ -764,15 +764,36 @@ func thompson(re []postfixNode) (Reg, error) { // and then some other node. // These three nodes (LPAREN, the middle node and RPAREN) are extracted together, concatenated // and added back in. + // If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN + // and RPAREN nodes. + // If neither node exists, that's a problem so I return an error. if c.nodetype == RPAREN { s.groupEnd = true - middleNode := mustPop(&nfa) - lparenNode := mustPop(&nfa) - s.groupNum = lparenNode.groupNum - tmp := concatenate(lparenNode, middleNode) - to_add := concatenate(tmp, s) - nfa = append(nfa, to_add) - + middleNode, err1 := pop(&nfa) + lparenNode, err2 := pop(&nfa) + if err1 != nil && err2 != nil { + return Reg{}, fmt.Errorf("Imbalanced parentheses.") + } else if err2 != nil { // There was no third node. ie. something like '()' + lparenNode = middleNode + if lparenNode.groupBegin != true { // There are only two nodes, but the first one isn't an LPAREN. + return Reg{}, fmt.Errorf("Imbalanced parentheses.") + } + s.groupNum = lparenNode.groupNum + to_add := concatenate(lparenNode, s) + nfa = append(nfa, to_add) + } else { + // At this point, we assume all three nodes are valid ('lparenNode', 'middleNode' and 's') + if lparenNode.groupBegin { + s.groupNum = lparenNode.groupNum + } else if middleNode.groupBegin { // Something like 'a()' + s.groupNum = middleNode.groupNum + } else { // A middleNode and lparenNode exist, but neither is actually an LPAREN. + return Reg{}, fmt.Errorf("Imbalanced parentheses.") + } + tmp := concatenate(lparenNode, middleNode) + to_add := concatenate(tmp, s) + nfa = append(nfa, to_add) + } } } if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated