You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

186 lines
4.7 KiB
Go

2 months ago
package main
import (
"fmt"
"os"
2 months ago
"slices"
"github.com/fatih/color"
2 months ago
)
const CONCAT rune = '~'
const UNION int = 0
func isOperator(c rune) bool {
if c == '*' || c == '|' || c == CONCAT {
return true
}
return false
}
/* priority returns the priority of the given operator */
func priority(op rune) int {
precedence := []rune{'|', CONCAT, '*'}
return slices.Index(precedence, op)
}
/*
shuntingYard applies the Shunting-Yard algorithm
to convert the given infix expression to postfix. This makes
it easier to parse the algorithm when doing Thompson.
See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-with-the-shunting-yard-algorithm/
*/
func shuntingYard(re string) string {
re_postfix := make([]rune, 0)
re_runes := []rune(re)
/* Add concatenation operators */
for i := 0; i < len(re_runes); i++ {
re_postfix = append(re_postfix, re_runes[i])
if re_runes[i] != '(' && re_runes[i] != '|' {
if i < len(re_runes)-1 {
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != ')' {
re_postfix = append(re_postfix, CONCAT)
}
}
}
}
// fmt.Println(string(re_postfix))
2 months ago
opStack := make([]rune, 0) // Operator stack
outQueue := make([]rune, 0) // Output queue
// Actual algorithm
for _, c := range re_postfix {
/* Two cases:
1. Current character is alphanumeric - send to output queue
2. Current character is operator - do the following:
a. If current character has greater priority than top of opStack, push to opStack.
b. If not, keep popping from opStack (and appending to outQueue) until:
i. opStack is empty, OR
ii. current character has greater priority than top of opStack
3. If current character is '(', push to opStack
4. If current character is ')', pop from opStack (and append to outQueue) until '(' is found. Discard parantheses.
*/
if isAlphaNum(c) {
outQueue = append(outQueue, c)
}
if isOperator(c) {
if len(opStack) == 0 {
opStack = append(opStack, c)
} else {
if priority(c) > priority(peek(opStack)) { // 2a
opStack = append(opStack, c)
} else {
for len(opStack) > 0 && priority(c) <= priority(peek(opStack)) { // 2b
to_append := pop(&opStack)
outQueue = append(outQueue, to_append)
}
opStack = append(opStack, c)
}
}
}
if c == '(' {
opStack = append(opStack, c)
}
if c == ')' {
for peek(opStack) != '(' {
to_append := pop(&opStack)
outQueue = append(outQueue, to_append)
}
_ = pop(&opStack) // Get rid of opening parantheses
}
}
// Pop all remaining operators (and append to outQueue)
for len(opStack) > 0 {
to_append := pop(&opStack)
outQueue = append(outQueue, to_append)
}
return string(outQueue)
}
// Thompson's algorithm. Constructs Finite-State Automaton from given string.
// Returns start state.
func thompson(re string) *State {
nfa := make([]*State, 0) // Stack of states
2 months ago
for _, c := range re {
if isAlphaNum(c) {
state := State{}
state.transitions = make(map[int][]*State)
2 months ago
state.content = int(c)
state.output = make([]*State, 0)
state.output = append(state.output, &state)
state.isEmpty = false
nfa = append(nfa, &state)
2 months ago
}
// Must be an operator if it isn't alphanumeric
switch c {
case CONCAT:
s2 := pop(&nfa)
s1 := pop(&nfa)
for i := range s1.output {
s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], s2)
2 months ago
}
s1.output = s2.output
nfa = append(nfa, s1)
case '*':
s1 := pop(&nfa)
for i := range s1.output {
s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], s1)
2 months ago
}
// Reset output to s1 (in case s1 was a union operator state, which has multiple outputs)
s1.output = nil
s1.output = append(s1.output, s1)
2 months ago
nfa = append(nfa, s1)
case '|':
s1 := pop(&nfa)
s2 := pop(&nfa)
s3 := State{}
s3.transitions = make(map[int][]*State)
s3.output = append(s3.output, s1, s2)
s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
2 months ago
s3.content = UNION
s3.isEmpty = true
nfa = append(nfa, &s3)
2 months ago
}
}
if len(nfa) != 1 {
panic("ERROR: Invalid Regex.")
}
verifyLastStates(nfa)
return nfa[0]
}
func main() {
if len(os.Args) < 3 {
fmt.Println("ERROR: Missing cmdline args")
os.Exit(22)
}
2 months ago
var re string
re = os.Args[1]
2 months ago
re_postfix := shuntingYard(re)
// fmt.Println(re_postfix)
startState := thompson(re_postfix)
start, end, matched := match(startState, os.Args[2])
if matched {
for i, c := range os.Args[2] {
if i >= start && i < end {
color.New(color.FgRed).Printf("%c", c)
} else {
fmt.Printf("%c", c)
}
}
fmt.Printf("\n")
} else {
fmt.Println(os.Args[2])
}
2 months ago
}