diff --git a/main.go b/main.go index e4af22f..94c43ba 100644 --- a/main.go +++ b/main.go @@ -380,16 +380,21 @@ func shuntingYard(re string) []postfixNode { } } } - node, err := pop(&outQueue) - if err != nil { + + idx := len(outQueue) - 1 + // Get the most recently added non-paren node + for node := outQueue[idx]; idx >= 0 && (node.nodetype == RPAREN || node.nodetype == LPAREN); node = outQueue[idx] { + idx-- + } + if idx < 0 { panic("Numeric specifier with no content.") } - node.startReps = startRangeNum - node.endReps = endRangeNum - outQueue = append(outQueue, node) + outQueue[idx].startReps = startRangeNum + outQueue[idx].endReps = endRangeNum } if c == '(' { opStack = append(opStack, c) + outQueue = append(outQueue, newPostfixNode(c)) numOpenParens++ } if c == ')' { @@ -401,7 +406,8 @@ func shuntingYard(re string) []postfixNode { to_append := mustPop(&opStack) outQueue = append(outQueue, newPostfixNode(to_append)) } - _ = mustPop(&opStack) // Get rid of opening parantheses + _ = mustPop(&opStack) // Get rid of opening parentheses + outQueue = append(outQueue, newPostfixNode(')')) // Add closing parentheses numOpenParens-- } } @@ -420,9 +426,10 @@ func shuntingYard(re string) []postfixNode { } // Thompson's algorithm. Constructs Finite-State Automaton from given string. -// Returns start state. -func thompson(re []postfixNode) *State { +// Returns start state and number of groups in regex. +func thompson(re []postfixNode) (*State, int) { nfa := make([]*State, 0) // Stack of states + numGroups := 0 // Number of capturing groups for _, c := range re { if c.nodetype == CHARACTER || c.nodetype == ASSERTION { state := State{} @@ -470,12 +477,45 @@ func thompson(re []postfixNode) *State { } } tmpRe := shuntingYard(state.lookaroundRegex) - state.lookaroundNFA = thompson(tmpRe) + var numGroupsLookaround int + state.lookaroundNFA, numGroupsLookaround = thompson(tmpRe) + state.lookaroundNumCaptureGroups = numGroupsLookaround } } nfa = append(nfa, &state) } + if c.nodetype == LPAREN || c.nodetype == RPAREN { + s := &State{} + s.assert = NONE + s.content = newContents(EPSILON) + s.isEmpty = true + s.output = make([]*State, 0) + s.output = append(s.output, s) + s.transitions = make(map[int][]*State) + // LPAREN nodes are just added normally + if c.nodetype == LPAREN { + numGroups++ + s.groupBegin = true + s.groupNum = numGroups + nfa = append(nfa, s) + continue + } + // For RPAREN nodes, I assume that the last two nodes in the list are an LPAREN, + // and then some other node. + // These three nodes (LPAREN, the middle node and RPAREN) are extracted together, concatenated + // and added back in. + if c.nodetype == RPAREN { + s.groupEnd = true + middleNode := mustPop(&nfa) + lparenNode := mustPop(&nfa) + s.groupNum = lparenNode.groupNum + tmp := concatenate(lparenNode, middleNode) + to_add := concatenate(tmp, s) + nfa = append(nfa, to_add) + + } + } // Must be an operator if it isn't a character switch c.nodetype { case CONCATENATE: @@ -540,7 +580,7 @@ func thompson(re []postfixNode) *State { verifyLastStates(nfa) - return nfa[0] + return nfa[0], numGroups } @@ -597,7 +637,7 @@ func main() { out := bufio.NewWriter(os.Stdout) re_postfix := shuntingYard(re) - startState := thompson(re_postfix) + startState, numGroups := thompson(re_postfix) for true { if linesRead { break @@ -613,6 +653,9 @@ func main() { panic(err) } } + if len(test_str) > 0 && test_str[len(test_str)-1] == '\n' { + test_str = test_str[:len(test_str)-1] + } } else { // Multi-line mode - read every line of input into a temp. string. // test_str will contain all lines of input (including newline characters) @@ -632,7 +675,7 @@ func main() { } } test_runes = []rune(test_str) - matchIndices := findAllMatches(startState, test_runes) + matchIndices := findAllMatches(startState, test_runes, numGroups) if *printMatchesFlag { // if we are in single line mode, print the line on which // the matches occur @@ -654,7 +697,7 @@ func main() { // This should make checking O(1) instead of O(n) indicesToPrint := new_uniq_arr[int]() for _, idx := range matchIndices { - indicesToPrint.add(genRange(idx.startIdx, idx.endIdx)...) + indicesToPrint.add(genRange(idx[0].startIdx, idx[0].endIdx)...) } // If we are inverting, then we should print the indices which _didn't_ match // in color. @@ -689,9 +732,9 @@ func main() { for i := range test_runes { inMatchIndex := false for _, idx := range matchIndices { - if i == idx.startIdx { + if i == idx[0].startIdx { fmt.Fprintf(out, "%s", *substituteText) - i = idx.endIdx + i = idx[0].endIdx inMatchIndex = true break } @@ -707,7 +750,7 @@ func main() { // Newline after every match - only if -o is enabled and -v is disabled. if *onlyFlag && !(*invertFlag) { for _, idx := range matchIndices { - if i+1 == idx.endIdx { // End index is one more than last index of match + if i+1 == idx[0].endIdx { // End index is one more than last index of match fmt.Fprintf(out, "\n") break } @@ -724,5 +767,6 @@ func main() { if err != nil { panic(err) } + fmt.Println() } }