From 6a96c98d04abe8b60ce5b60b84b5caa697bd0ebb Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Sat, 1 Feb 2025 19:20:33 -0500 Subject: [PATCH] Fixed bug where the regex '(()|.)(b)' wouldn't compile --- regex/compile.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/regex/compile.go b/regex/compile.go index fe324be..eb78a93 100644 --- a/regex/compile.go +++ b/regex/compile.go @@ -949,7 +949,9 @@ func thompson(re []postfixNode) (Reg, error) { // and added back in. // If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN // and RPAREN nodes. - // If neither node exists, that's a problem so I return an error. + // If the middle node exists but is itself the start of a group, then that _must_ be the opening paren for + // the closing paren that I'm on. I put the third node back (because it isn't involved in the capturing group), then + // I concatenate those two and add them. If neither node exists, that's a problem so I return an error. if c.nodetype == rparenNode { s.groupEnd = true middleNode, err1 := pop(&nfa) @@ -964,6 +966,11 @@ func thompson(re []postfixNode) (Reg, error) { s.groupNum = lparenNode.groupNum to_add := concatenate(lparenNode, s) nfa = append(nfa, to_add) + } else if middleNode.groupBegin && len(middleNode.transitions) == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first closing parentheses + nfa = append(nfa, lparenNode) + s.groupNum = middleNode.groupNum // In this case, the 'middle' node is actually a paren node + to_add := concatenate(middleNode, s) + nfa = append(nfa, to_add) } else { // At this point, we assume all three nodes are valid ('lparenNode', 'middleNode' and 's') if lparenNode.groupBegin {