|
|
@ -380,16 +380,21 @@ func shuntingYard(re string) []postfixNode {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
node, err := pop(&outQueue)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
idx := len(outQueue) - 1
|
|
|
|
|
|
|
|
// Get the most recently added non-paren node
|
|
|
|
|
|
|
|
for node := outQueue[idx]; idx >= 0 && (node.nodetype == RPAREN || node.nodetype == LPAREN); node = outQueue[idx] {
|
|
|
|
|
|
|
|
idx--
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if idx < 0 {
|
|
|
|
panic("Numeric specifier with no content.")
|
|
|
|
panic("Numeric specifier with no content.")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
node.startReps = startRangeNum
|
|
|
|
outQueue[idx].startReps = startRangeNum
|
|
|
|
node.endReps = endRangeNum
|
|
|
|
outQueue[idx].endReps = endRangeNum
|
|
|
|
outQueue = append(outQueue, node)
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if c == '(' {
|
|
|
|
if c == '(' {
|
|
|
|
opStack = append(opStack, c)
|
|
|
|
opStack = append(opStack, c)
|
|
|
|
|
|
|
|
outQueue = append(outQueue, newPostfixNode(c))
|
|
|
|
numOpenParens++
|
|
|
|
numOpenParens++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if c == ')' {
|
|
|
|
if c == ')' {
|
|
|
@ -401,7 +406,8 @@ func shuntingYard(re string) []postfixNode {
|
|
|
|
to_append := mustPop(&opStack)
|
|
|
|
to_append := mustPop(&opStack)
|
|
|
|
outQueue = append(outQueue, newPostfixNode(to_append))
|
|
|
|
outQueue = append(outQueue, newPostfixNode(to_append))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ = mustPop(&opStack) // Get rid of opening parantheses
|
|
|
|
_ = mustPop(&opStack) // Get rid of opening parentheses
|
|
|
|
|
|
|
|
outQueue = append(outQueue, newPostfixNode(')')) // Add closing parentheses
|
|
|
|
numOpenParens--
|
|
|
|
numOpenParens--
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -420,9 +426,10 @@ func shuntingYard(re string) []postfixNode {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Thompson's algorithm. Constructs Finite-State Automaton from given string.
|
|
|
|
// Thompson's algorithm. Constructs Finite-State Automaton from given string.
|
|
|
|
// Returns start state.
|
|
|
|
// Returns start state and number of groups in regex.
|
|
|
|
func thompson(re []postfixNode) *State {
|
|
|
|
func thompson(re []postfixNode) (*State, int) {
|
|
|
|
nfa := make([]*State, 0) // Stack of states
|
|
|
|
nfa := make([]*State, 0) // Stack of states
|
|
|
|
|
|
|
|
numGroups := 0 // Number of capturing groups
|
|
|
|
for _, c := range re {
|
|
|
|
for _, c := range re {
|
|
|
|
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
|
|
|
|
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
|
|
|
|
state := State{}
|
|
|
|
state := State{}
|
|
|
@ -470,12 +477,45 @@ func thompson(re []postfixNode) *State {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
tmpRe := shuntingYard(state.lookaroundRegex)
|
|
|
|
tmpRe := shuntingYard(state.lookaroundRegex)
|
|
|
|
state.lookaroundNFA = thompson(tmpRe)
|
|
|
|
var numGroupsLookaround int
|
|
|
|
|
|
|
|
state.lookaroundNFA, numGroupsLookaround = thompson(tmpRe)
|
|
|
|
|
|
|
|
state.lookaroundNumCaptureGroups = numGroupsLookaround
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
nfa = append(nfa, &state)
|
|
|
|
nfa = append(nfa, &state)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if c.nodetype == LPAREN || c.nodetype == RPAREN {
|
|
|
|
|
|
|
|
s := &State{}
|
|
|
|
|
|
|
|
s.assert = NONE
|
|
|
|
|
|
|
|
s.content = newContents(EPSILON)
|
|
|
|
|
|
|
|
s.isEmpty = true
|
|
|
|
|
|
|
|
s.output = make([]*State, 0)
|
|
|
|
|
|
|
|
s.output = append(s.output, s)
|
|
|
|
|
|
|
|
s.transitions = make(map[int][]*State)
|
|
|
|
|
|
|
|
// LPAREN nodes are just added normally
|
|
|
|
|
|
|
|
if c.nodetype == LPAREN {
|
|
|
|
|
|
|
|
numGroups++
|
|
|
|
|
|
|
|
s.groupBegin = true
|
|
|
|
|
|
|
|
s.groupNum = numGroups
|
|
|
|
|
|
|
|
nfa = append(nfa, s)
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// For RPAREN nodes, I assume that the last two nodes in the list are an LPAREN,
|
|
|
|
|
|
|
|
// and then some other node.
|
|
|
|
|
|
|
|
// These three nodes (LPAREN, the middle node and RPAREN) are extracted together, concatenated
|
|
|
|
|
|
|
|
// and added back in.
|
|
|
|
|
|
|
|
if c.nodetype == RPAREN {
|
|
|
|
|
|
|
|
s.groupEnd = true
|
|
|
|
|
|
|
|
middleNode := mustPop(&nfa)
|
|
|
|
|
|
|
|
lparenNode := mustPop(&nfa)
|
|
|
|
|
|
|
|
s.groupNum = lparenNode.groupNum
|
|
|
|
|
|
|
|
tmp := concatenate(lparenNode, middleNode)
|
|
|
|
|
|
|
|
to_add := concatenate(tmp, s)
|
|
|
|
|
|
|
|
nfa = append(nfa, to_add)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
// Must be an operator if it isn't a character
|
|
|
|
// Must be an operator if it isn't a character
|
|
|
|
switch c.nodetype {
|
|
|
|
switch c.nodetype {
|
|
|
|
case CONCATENATE:
|
|
|
|
case CONCATENATE:
|
|
|
@ -540,7 +580,7 @@ func thompson(re []postfixNode) *State {
|
|
|
|
|
|
|
|
|
|
|
|
verifyLastStates(nfa)
|
|
|
|
verifyLastStates(nfa)
|
|
|
|
|
|
|
|
|
|
|
|
return nfa[0]
|
|
|
|
return nfa[0], numGroups
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -597,7 +637,7 @@ func main() {
|
|
|
|
out := bufio.NewWriter(os.Stdout)
|
|
|
|
out := bufio.NewWriter(os.Stdout)
|
|
|
|
|
|
|
|
|
|
|
|
re_postfix := shuntingYard(re)
|
|
|
|
re_postfix := shuntingYard(re)
|
|
|
|
startState := thompson(re_postfix)
|
|
|
|
startState, numGroups := thompson(re_postfix)
|
|
|
|
for true {
|
|
|
|
for true {
|
|
|
|
if linesRead {
|
|
|
|
if linesRead {
|
|
|
|
break
|
|
|
|
break
|
|
|
@ -613,6 +653,9 @@ func main() {
|
|
|
|
panic(err)
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(test_str) > 0 && test_str[len(test_str)-1] == '\n' {
|
|
|
|
|
|
|
|
test_str = test_str[:len(test_str)-1]
|
|
|
|
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
// Multi-line mode - read every line of input into a temp. string.
|
|
|
|
// Multi-line mode - read every line of input into a temp. string.
|
|
|
|
// test_str will contain all lines of input (including newline characters)
|
|
|
|
// test_str will contain all lines of input (including newline characters)
|
|
|
@ -632,7 +675,7 @@ func main() {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
test_runes = []rune(test_str)
|
|
|
|
test_runes = []rune(test_str)
|
|
|
|
matchIndices := findAllMatches(startState, test_runes)
|
|
|
|
matchIndices := findAllMatches(startState, test_runes, numGroups)
|
|
|
|
if *printMatchesFlag {
|
|
|
|
if *printMatchesFlag {
|
|
|
|
// if we are in single line mode, print the line on which
|
|
|
|
// if we are in single line mode, print the line on which
|
|
|
|
// the matches occur
|
|
|
|
// the matches occur
|
|
|
@ -654,7 +697,7 @@ func main() {
|
|
|
|
// This should make checking O(1) instead of O(n)
|
|
|
|
// This should make checking O(1) instead of O(n)
|
|
|
|
indicesToPrint := new_uniq_arr[int]()
|
|
|
|
indicesToPrint := new_uniq_arr[int]()
|
|
|
|
for _, idx := range matchIndices {
|
|
|
|
for _, idx := range matchIndices {
|
|
|
|
indicesToPrint.add(genRange(idx.startIdx, idx.endIdx)...)
|
|
|
|
indicesToPrint.add(genRange(idx[0].startIdx, idx[0].endIdx)...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// If we are inverting, then we should print the indices which _didn't_ match
|
|
|
|
// If we are inverting, then we should print the indices which _didn't_ match
|
|
|
|
// in color.
|
|
|
|
// in color.
|
|
|
@ -689,9 +732,9 @@ func main() {
|
|
|
|
for i := range test_runes {
|
|
|
|
for i := range test_runes {
|
|
|
|
inMatchIndex := false
|
|
|
|
inMatchIndex := false
|
|
|
|
for _, idx := range matchIndices {
|
|
|
|
for _, idx := range matchIndices {
|
|
|
|
if i == idx.startIdx {
|
|
|
|
if i == idx[0].startIdx {
|
|
|
|
fmt.Fprintf(out, "%s", *substituteText)
|
|
|
|
fmt.Fprintf(out, "%s", *substituteText)
|
|
|
|
i = idx.endIdx
|
|
|
|
i = idx[0].endIdx
|
|
|
|
inMatchIndex = true
|
|
|
|
inMatchIndex = true
|
|
|
|
break
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -707,7 +750,7 @@ func main() {
|
|
|
|
// Newline after every match - only if -o is enabled and -v is disabled.
|
|
|
|
// Newline after every match - only if -o is enabled and -v is disabled.
|
|
|
|
if *onlyFlag && !(*invertFlag) {
|
|
|
|
if *onlyFlag && !(*invertFlag) {
|
|
|
|
for _, idx := range matchIndices {
|
|
|
|
for _, idx := range matchIndices {
|
|
|
|
if i+1 == idx.endIdx { // End index is one more than last index of match
|
|
|
|
if i+1 == idx[0].endIdx { // End index is one more than last index of match
|
|
|
|
fmt.Fprintf(out, "\n")
|
|
|
|
fmt.Fprintf(out, "\n")
|
|
|
|
break
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -724,5 +767,6 @@ func main() {
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fmt.Println()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|