First commit

master
Aadhavan Srinivasan 2 months ago
commit 82b33f3c9a

@ -0,0 +1,9 @@
.DEFAULT_GOAL := build
.PHONY: fmt vet build
fmt:
go fmt ./...
vet: fmt
go vet ./...
build: vet
go build ./...

@ -0,0 +1,3 @@
module re
go 1.23.1

@ -0,0 +1,167 @@
package main
import (
"fmt"
"slices"
)
const CONCAT rune = '~'
const UNION int = 0
func isOperator(c rune) bool {
if c == '*' || c == '|' || c == CONCAT {
return true
}
return false
}
/* priority returns the priority of the given operator */
func priority(op rune) int {
precedence := []rune{'|', CONCAT, '*'}
return slices.Index(precedence, op)
}
/*
shuntingYard applies the Shunting-Yard algorithm
to convert the given infix expression to postfix. This makes
it easier to parse the algorithm when doing Thompson.
See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-with-the-shunting-yard-algorithm/
*/
func shuntingYard(re string) string {
re_postfix := make([]rune, 0)
re_runes := []rune(re)
/* Add concatenation operators */
for i := 0; i < len(re_runes); i++ {
re_postfix = append(re_postfix, re_runes[i])
if re_runes[i] != '(' && re_runes[i] != '|' {
if i < len(re_runes)-1 {
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != ')' {
re_postfix = append(re_postfix, CONCAT)
}
}
}
}
fmt.Println(string(re_postfix))
opStack := make([]rune, 0) // Operator stack
outQueue := make([]rune, 0) // Output queue
// Actual algorithm
for _, c := range re_postfix {
/* Two cases:
1. Current character is alphanumeric - send to output queue
2. Current character is operator - do the following:
a. If current character has greater priority than top of opStack, push to opStack.
b. If not, keep popping from opStack (and appending to outQueue) until:
i. opStack is empty, OR
ii. current character has greater priority than top of opStack
3. If current character is '(', push to opStack
4. If current character is ')', pop from opStack (and append to outQueue) until '(' is found. Discard parantheses.
*/
if isAlphaNum(c) {
outQueue = append(outQueue, c)
}
if isOperator(c) {
if len(opStack) == 0 {
opStack = append(opStack, c)
} else {
if priority(c) > priority(peek(opStack)) { // 2a
opStack = append(opStack, c)
} else {
for len(opStack) > 0 && priority(c) <= priority(peek(opStack)) { // 2b
to_append := pop(&opStack)
outQueue = append(outQueue, to_append)
}
opStack = append(opStack, c)
}
}
}
if c == '(' {
opStack = append(opStack, c)
}
if c == ')' {
for peek(opStack) != '(' {
to_append := pop(&opStack)
outQueue = append(outQueue, to_append)
}
_ = pop(&opStack) // Get rid of opening parantheses
}
}
// Pop all remaining operators (and append to outQueue)
for len(opStack) > 0 {
to_append := pop(&opStack)
outQueue = append(outQueue, to_append)
}
return string(outQueue)
}
// Thompson's algorithm. Constructs Finite-State Automaton from given string.
// Returns start state.
func thompson(re string) State {
nfa := make([]State, 0) // Stack of states
for _, c := range re {
if isAlphaNum(c) {
state := State{}
state.transitions = make(map[int]*State)
state.content = int(c)
state.output = make([]*State, 0)
state.output = append(state.output, &state)
state.isEmpty = false
nfa = append(nfa, state)
}
// Must be an operator if it isn't alphanumeric
switch c {
case CONCAT:
s2 := pop(&nfa)
s1 := pop(&nfa)
for i := range s1.output {
s1.output[i].transitions[s2.content] = &s2
}
s1.output = s2.output
nfa = append(nfa, s1)
case '*':
s1 := pop(&nfa)
for i := range s1.output {
s1.output[i].transitions[s1.content] = &s1
}
nfa = append(nfa, s1)
case '|':
s1 := pop(&nfa)
s2 := pop(&nfa)
s3 := State{}
s3.transitions = make(map[int]*State)
s3.output = append(s3.output, &s1, &s2)
s3.transitions[s1.content] = &s1
s3.transitions[s2.content] = &s2
s3.content = UNION
s3.isEmpty = true
nfa = append(nfa, s3)
}
}
if len(nfa) != 1 {
panic("ERROR: Invalid Regex.")
}
verifyLastStates(nfa)
return nfa[0]
}
func main() {
var re string
// fmt.Scanln(&re)
re = "a(b|c)*d"
re_postfix := shuntingYard(re)
fmt.Println(re_postfix)
start := thompson(re_postfix)
assert(len(start.transitions) == 1)
assert(len(start.transitions[UNION].transitions) == 2)
}

@ -0,0 +1,15 @@
package main
import (
"unicode"
)
func isAlphaNum(c rune) bool {
return unicode.IsLetter(c) || unicode.IsNumber(c)
}
func assert(cond bool) {
if cond != true {
panic("Assertion Failed")
}
}

@ -0,0 +1,38 @@
package main
const EPSILON int = 0
type State struct {
content int // Contents of current state
isEmpty bool // If it is empty - Union operator states will be empty
isLast bool // If it is the last state (acept state)
output []*State // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
transitions map[int]*State // Transitions to different states (can be associated with an int, representing content of destination state)
}
type NFA struct {
start State
outputs []State
}
// verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
func verifyLastStatesHelper(state *State, visited map[*State]bool) {
if len(state.transitions) == 0 {
state.isLast = true
return
}
if visited[state] == true {
return
}
visited[state] = true
for k := range state.transitions {
if state.transitions[k] != state {
verifyLastStatesHelper(state.transitions[k], visited)
}
}
}
// verifyLastStates penables the 'isLast' flag for the leaf nodes (last states)
func verifyLastStates(start []State) {
verifyLastStatesHelper(&start[0], make(map[*State]bool))
}

@ -0,0 +1,12 @@
package main
// Helper functions for slices, to make them behave more like stacks
func peek[T any](s []T) T {
return s[len(s)-1]
}
func pop[T any](sp *[]T) T {
to_return := (*sp)[len(*sp)-1]
*sp = (*sp)[:len(*sp)-1]
return to_return
}
Loading…
Cancel
Save