You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
195 lines
5.3 KiB
Go
195 lines
5.3 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"slices"
|
|
|
|
"github.com/fatih/color"
|
|
)
|
|
|
|
const CONCAT rune = '~'
|
|
|
|
func isOperator(c rune) bool {
|
|
if c == '*' || c == '|' || c == CONCAT {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
/* priority returns the priority of the given operator */
|
|
func priority(op rune) int {
|
|
precedence := []rune{'|', CONCAT, '*'}
|
|
return slices.Index(precedence, op)
|
|
}
|
|
|
|
/*
|
|
shuntingYard applies the Shunting-Yard algorithm
|
|
|
|
to convert the given infix expression to postfix. This makes
|
|
it easier to parse the algorithm when doing Thompson.
|
|
See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-with-the-shunting-yard-algorithm/
|
|
*/
|
|
func shuntingYard(re string) string {
|
|
re_postfix := make([]rune, 0)
|
|
re_runes := []rune(re)
|
|
/* Add concatenation operators */
|
|
for i := 0; i < len(re_runes); i++ {
|
|
re_postfix = append(re_postfix, re_runes[i])
|
|
if re_runes[i] != '(' && re_runes[i] != '|' {
|
|
if i < len(re_runes)-1 {
|
|
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != ')' {
|
|
re_postfix = append(re_postfix, CONCAT)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// fmt.Println(string(re_postfix))
|
|
|
|
opStack := make([]rune, 0) // Operator stack
|
|
outQueue := make([]rune, 0) // Output queue
|
|
|
|
// Actual algorithm
|
|
for _, c := range re_postfix {
|
|
/* Two cases:
|
|
1. Current character is alphanumeric - send to output queue
|
|
2. Current character is operator - do the following:
|
|
a. If current character has greater priority than top of opStack, push to opStack.
|
|
b. If not, keep popping from opStack (and appending to outQueue) until:
|
|
i. opStack is empty, OR
|
|
ii. current character has greater priority than top of opStack
|
|
3. If current character is '(', push to opStack
|
|
4. If current character is ')', pop from opStack (and append to outQueue) until '(' is found. Discard parantheses.
|
|
*/
|
|
if isAlphaNum(c) {
|
|
outQueue = append(outQueue, c)
|
|
}
|
|
if isOperator(c) {
|
|
if len(opStack) == 0 {
|
|
opStack = append(opStack, c)
|
|
} else {
|
|
if priority(c) > priority(peek(opStack)) { // 2a
|
|
opStack = append(opStack, c)
|
|
} else {
|
|
for len(opStack) > 0 && priority(c) <= priority(peek(opStack)) { // 2b
|
|
to_append := pop(&opStack)
|
|
outQueue = append(outQueue, to_append)
|
|
}
|
|
opStack = append(opStack, c)
|
|
}
|
|
}
|
|
}
|
|
if c == '(' {
|
|
opStack = append(opStack, c)
|
|
}
|
|
if c == ')' {
|
|
for peek(opStack) != '(' {
|
|
to_append := pop(&opStack)
|
|
outQueue = append(outQueue, to_append)
|
|
}
|
|
_ = pop(&opStack) // Get rid of opening parantheses
|
|
}
|
|
}
|
|
|
|
// Pop all remaining operators (and append to outQueue)
|
|
for len(opStack) > 0 {
|
|
to_append := pop(&opStack)
|
|
outQueue = append(outQueue, to_append)
|
|
}
|
|
|
|
return string(outQueue)
|
|
}
|
|
|
|
// Thompson's algorithm. Constructs Finite-State Automaton from given string.
|
|
// Returns start state.
|
|
func thompson(re string) *State {
|
|
nfa := make([]*State, 0) // Stack of states
|
|
for _, c := range re {
|
|
if isAlphaNum(c) {
|
|
state := State{}
|
|
state.transitions = make(map[int][]*State)
|
|
state.content = int(c)
|
|
state.output = make([]*State, 0)
|
|
state.output = append(state.output, &state)
|
|
state.isEmpty = false
|
|
nfa = append(nfa, &state)
|
|
}
|
|
// Must be an operator if it isn't alphanumeric
|
|
switch c {
|
|
case CONCAT:
|
|
s2 := pop(&nfa)
|
|
s1 := pop(&nfa)
|
|
for i := range s1.output {
|
|
s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], s2)
|
|
}
|
|
s1.output = s2.output
|
|
nfa = append(nfa, s1)
|
|
case '*': // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
|
|
s1 := pop(&nfa)
|
|
stateToAdd := &State{}
|
|
stateToAdd.transitions = make(map[int][]*State)
|
|
stateToAdd.content = EPSILON
|
|
stateToAdd.isEmpty = true
|
|
stateToAdd.isKleene = true
|
|
stateToAdd.output = append(stateToAdd.output, stateToAdd)
|
|
for i := range s1.output {
|
|
s1.output[i].transitions[stateToAdd.content] = append(s1.output[i].transitions[stateToAdd.content], stateToAdd)
|
|
}
|
|
stateToAdd.transitions[s1.content] = append(stateToAdd.transitions[s1.content], s1)
|
|
nfa = append(nfa, stateToAdd)
|
|
case '|':
|
|
s1 := pop(&nfa)
|
|
s2 := pop(&nfa)
|
|
s3 := State{}
|
|
s3.transitions = make(map[int][]*State)
|
|
s3.output = append(s3.output, s1.output...)
|
|
s3.output = append(s3.output, s2.output...)
|
|
s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
|
|
s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
|
|
s3.content = EPSILON
|
|
s3.isEmpty = true
|
|
|
|
nfa = append(nfa, &s3)
|
|
}
|
|
}
|
|
if len(nfa) != 1 {
|
|
panic("ERROR: Invalid Regex.")
|
|
}
|
|
|
|
verifyLastStates(nfa)
|
|
|
|
return nfa[0]
|
|
|
|
}
|
|
|
|
func main() {
|
|
// Process:
|
|
// 1. Convert regex into postfix notation (Shunting-Yard algorithm)
|
|
// a. Add explicit concatenation operators to facilitate this
|
|
// 2. Build NFA from postfix representation (Thompson's algorithm)
|
|
// 3. Run the string against the NFA
|
|
if len(os.Args) < 3 {
|
|
fmt.Println("ERROR: Missing cmdline args")
|
|
os.Exit(22)
|
|
}
|
|
var re string
|
|
re = os.Args[1]
|
|
re_postfix := shuntingYard(re)
|
|
// fmt.Println(re_postfix)
|
|
startState := thompson(re_postfix)
|
|
start, end, matched := match(startState, os.Args[2])
|
|
if matched {
|
|
for i, c := range os.Args[2] {
|
|
if i >= start && i < end {
|
|
color.New(color.FgRed).Printf("%c", c)
|
|
} else {
|
|
fmt.Printf("%c", c)
|
|
}
|
|
}
|
|
fmt.Printf("\n")
|
|
} else {
|
|
fmt.Println(os.Args[2])
|
|
}
|
|
}
|