regex/main.go

package main

import (
	"bufio"
	"fmt"
	"os"
	"slices"

	"github.com/fatih/color"
)

const CONCAT rune = '~'

func isOperator(c rune) bool {
	if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT {
		return true
	}
	return false
}

/* priority returns the priority of the given operator */
func priority(op rune) int {
	precedence := []rune{'|', CONCAT, '+', '*', '?'}
	return slices.Index(precedence, op)
}

/*
The Shunting-Yard algorithm is used to convert the given infix (regeular) expression to postfix.
The primary benefit of this is getting rid of parentheses.
It also inserts explicit concatenation operators to make parsing easier in Thompson's algorithm.
See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-with-the-shunting-yard-algorithm/
*/
func shuntingYard(re string) []postfixNode {
	re_postfix := make([]rune, 0)
	re_runes := []rune(re) // Convert the string to a slice of runes to allow iteration through it
	/* 	Add concatenation operators.
	Only add a concatenation operator between two characters if both the following conditions are met:
		1. 	The first character isn't an opening parantheses or alteration operator (or an escape character)
			a. This makes sense, because these operators can't be _concatenated_ with anything else.
		2. The second character isn't a 'closing operator' - one that applies to something before it
			a. Again, these operators can'be concatenated _to_. They can, however, be concatenated _from_.
	*/
	i := 0
	for i < len(re_runes) {
		re_postfix = append(re_postfix, re_runes[i])
		if re_runes[i] == '[' && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped
			for re_runes[i] != ']' {
				i++ // Skip all characters inside brackets
				// TODO: Check for escaped characters

				// Check ahead for character range
				if i < len(re_runes)-2 && re_runes[i+1] == '-' {
					rangeStart := re_runes[i]
					rangeEnd := re_runes[i+2]
					if int(rangeEnd) < int(rangeStart) {
						panic("Range is out of order.")
					}

					for i := rangeStart; i <= rangeEnd; i++ {
						re_postfix = append(re_postfix, i)
					}

					i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)
					continue
				}
				re_postfix = append(re_postfix, re_runes[i])
			}
			continue
		}
		if re_runes[i] != '(' && re_runes[i] != '|' && re_runes[i] != '\\' {
			if i < len(re_runes)-1 {
				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' {
					re_postfix = append(re_postfix, CONCAT)
				}
			}
		}
		i++
	}

	opStack := make([]rune, 0)         // Operator stack
	outQueue := make([]postfixNode, 0) // Output queue

	// Actual algorithm
	for i := 0; i < len(re_postfix); i++ {
		/* Two cases:
		1. Current character is alphanumeric - send to output queue
		2. Current character is operator - do the following:
			a. If current character has greater priority than top of opStack, push to opStack.
			b. If not, keep popping from opStack (and appending to outQueue) until:
				i. opStack is empty, OR
				ii. current character has greater priority than top of opStack
		3. If current character is '(', push to opStack
		4. If current character is ')', pop from opStack (and append to outQueue) until '(' is found. Discard parantheses.
		5. If current character is '[', find all the characters until ']', then create a postfixNode containing all these contents. Add this node to outQueue.
		*/
		c := re_postfix[i]
		if isAlphaNum(c) {
			outQueue = append(outQueue, newPostfixNode(c))
			continue
		}
		// Escape character
		if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
			if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
				panic("ERROR: Backslash with no escape character.")
			}
			i++
			switch re_postfix[i] {
			case 's': // Whitespace
				outQueue = append(outQueue, newPostfixNode(whitespaceChars...))
			case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it
				outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
					return slices.Contains(whitespaceChars, r)
				})...))
			case 'd': // Digits
				outQueue = append(outQueue, newPostfixNode(digitChars...))
			case 'D': // Non-digits - same fancy way as 'S'
				outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
					return slices.Contains(digitChars, r)
				})...))
			case 'w': // word character
				outQueue = append(outQueue, newPostfixNode(wordChars...))
			case 'W': // Non-word character - same fancy way as 'S' and 'D'
				outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {
					return slices.Contains(wordChars, r)
				})...))
			default: // None of the above - append it as a regular character
				outQueue = append(outQueue, newPostfixCharNode(re_postfix[i]))
			}
			continue // Escaped character will automatically be skipped when loop variable increments
		}

		if c == '.' { // Dot metacharacter - represents 'any' character, but I am only adding Unicode 0020-007E
			outQueue = append(outQueue, newPostfixNode(dotChars()...))
			continue
		}
		if isOperator(c) {
			if len(opStack) == 0 {
				opStack = append(opStack, c)
			} else {
				topStack, err := peek(opStack)
				if err != nil {
					panic("ERROR: Operator without operand.")
				}
				if priority(c) > priority(topStack) { // 2a
					opStack = append(opStack, c)
				} else {
					for priority(c) <= priority(topStack) { // 2b
						to_append := mustPop(&opStack)
						outQueue = append(outQueue, newPostfixNode(to_append))
						topStack, _ = peek(opStack)
					}
					opStack = append(opStack, c)
				}
			}
		}
		if c == '[' { // Used for character classes
			i++                      // Step forward so we can look at the character class
			chars := make([]rune, 0) // List of characters -  used only for character classes
			for i < len(re_postfix) {
				if re_postfix[i] == ']' {
					break
				}
				chars = append(chars, re_postfix[i])
				i++
			}
			if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.
				panic("ERROR: Opening bracket without closing bracket.")
			}
			outQueue = append(outQueue, newPostfixNode(chars...))
			i++ // Step forward to skip closing bracket
			continue
		}
		if c == '(' {
			opStack = append(opStack, c)
		}
		if c == ')' {
			// Keep popping from opStack until we encounter an opening parantheses. Panic if we reach the end of the stack.
			for val, err := peek(opStack); val != '('; val, err = peek(opStack) {
				if err != nil {
					panic("ERROR: Imbalanced parantheses.")
				}
				to_append := mustPop(&opStack)
				outQueue = append(outQueue, newPostfixNode(to_append))
			}
			_ = mustPop(&opStack) // Get rid of opening parantheses
		}
	}

	// Pop all remaining operators (and append to outQueue)
	for len(opStack) > 0 {
		to_append := mustPop(&opStack)
		outQueue = append(outQueue, newPostfixNode(to_append))
	}

	return outQueue
}

// Thompson's algorithm. Constructs Finite-State Automaton from given string.
// Returns start state.
func thompson(re []postfixNode) *State {
	nfa := make([]*State, 0) // Stack of states
	for _, c := range re {
		if c.nodetype == CHARACTER {
			state := State{}
			state.transitions = make(map[int][]*State)
			state.content = rune2Contents(c.contents)
			state.output = make([]*State, 0)
			state.output = append(state.output, &state)
			state.isEmpty = false
			nfa = append(nfa, &state)
		}
		// Must be an operator if it isn't a character
		switch c.nodetype {
		case CONCATENATE:
			s2 := mustPop(&nfa)
			s1 := mustPop(&nfa)
			s1 = concatenate(s1, s2)
			nfa = append(nfa, s1)
		case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
			s1 := mustPop(&nfa)
			stateToAdd := kleene(*s1)
			nfa = append(nfa, stateToAdd)
		case PLUS: // a+ is equivalent to aa*
			s1 := mustPop(&nfa)
			s2 := kleene(*s1)
			s1 = concatenate(s1, s2)
			nfa = append(nfa, s1)
		case QUESTION: // ab? is equivalent to a(b|)
			s1 := mustPop(&nfa)
			s2 := &State{}
			s2.transitions = make(map[int][]*State)
			s2.content = newContents(EPSILON)
			s2.output = append(s2.output, s2)
			s2.isEmpty = true
			s3 := alternate(s1, s2)
			nfa = append(nfa, s3)
		case PIPE:
			s1 := mustPop(&nfa)
			s2 := mustPop(&nfa)
			s3 := alternate(s1, s2)
			nfa = append(nfa, s3)
		}
	}
	if len(nfa) != 1 {
		panic("ERROR: Invalid Regex.")
	}

	verifyLastStates(nfa)

	return nfa[0]

}

func main() {
	// Process:
	// 1. Convert regex into postfix notation (Shunting-Yard algorithm)
	// 		a. Add explicit concatenation operators to facilitate this
	// 2. Build NFA from postfix representation (Thompson's algorithm)
	// 3. Run the string against the NFA
	if len(os.Args) != 2 {
		fmt.Println("ERROR: Missing cmdline args")
		os.Exit(22)
	}
	var re string
	re = os.Args[1]
	var test_str string
	// Read test string from stdin
	reader := bufio.NewReader(os.Stdin)
	test_str, err := reader.ReadString('\n')
	if err != nil {
		panic(err)
	}
	fmt.Scanln(&test_str)
	re_postfix := shuntingYard(re)
	// fmt.Println(re_postfix)
	startState := thompson(re_postfix)
	matchIndices := findAllMatches(startState, test_str)
	inColor := false
	if len(matchIndices) > 0 {
		for i, c := range test_str {
			for _, indices := range matchIndices {
				if i >= indices.startIdx && i < indices.endIdx {
					color.New(color.FgRed).Printf("%c", c)
					inColor = true
					break
				}
			}
			if inColor == false {
				fmt.Printf("%c", c)
			}
			inColor = false
		}
	} else {
		fmt.Print(test_str)
	}
}
First commit 2 months ago			`package main`

			`import (`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`"bufio"`
First commit 2 months ago			`"fmt"`
Rudimentary matching works 2 months ago			`"os"`
First commit 2 months ago			`"slices"`
Print matched content in color 2 months ago
			`"github.com/fatih/color"`
First commit 2 months ago			`)`

			`const CONCAT rune = '~'`

			`func isOperator(c rune) bool {`
Added question mark operator - 0 or 1 2 months ago			`if c == '+' \|\| c == '?' \|\| c == '*' \|\| c == '\|' \|\| c == CONCAT {`
First commit 2 months ago			`return true`
			`}`
			`return false`
			`}`

			`/* priority returns the priority of the given operator */`
			`func priority(op rune) int {`
Added question mark operator - 0 or 1 2 months ago			`precedence := []rune{'\|', CONCAT, '+', '*', '?'}`
First commit 2 months ago			`return slices.Index(precedence, op)`
			`}`

			`/*`
Added comments 2 months ago			`The Shunting-Yard algorithm is used to convert the given infix (regeular) expression to postfix.`
			`The primary benefit of this is getting rid of parentheses.`
			`It also inserts explicit concatenation operators to make parsing easier in Thompson's algorithm.`
First commit 2 months ago			`See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-with-the-shunting-yard-algorithm/`
			`*/`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`func shuntingYard(re string) []postfixNode {`
First commit 2 months ago			`re_postfix := make([]rune, 0)`
Added comments 2 months ago			`re_runes := []rune(re) // Convert the string to a slice of runes to allow iteration through it`
			`/* Add concatenation operators.`
			`Only add a concatenation operator between two characters if both the following conditions are met:`
Added support for some escaped metacharacters 2 months ago			`1. The first character isn't an opening parantheses or alteration operator (or an escape character)`
Added comments 2 months ago			`a. This makes sense, because these operators can't be _concatenated_ with anything else.`
			`2. The second character isn't a 'closing operator' - one that applies to something before it`
			`a. Again, these operators can'be concatenated _to_. They can, however, be concatenated _from_.`
			`*/`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`i := 0`
			`for i < len(re_runes) {`
First commit 2 months ago			`re_postfix = append(re_postfix, re_runes[i])`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`if re_runes[i] == '[' && (i == 0 \|\| re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped`
			`for re_runes[i] != ']' {`
			`i++ // Skip all characters inside brackets`
Added support for character ranges and dot metacharacter 2 months ago			`// TODO: Check for escaped characters`

			`// Check ahead for character range`
			`if i < len(re_runes)-2 && re_runes[i+1] == '-' {`
			`rangeStart := re_runes[i]`
			`rangeEnd := re_runes[i+2]`
			`if int(rangeEnd) < int(rangeStart) {`
			`panic("Range is out of order.")`
			`}`

			`for i := rangeStart; i <= rangeEnd; i++ {`
			`re_postfix = append(re_postfix, i)`
			`}`

			`i += 2 // Skip start and hyphen (end will automatically be skipped on next iteration of loop)`
			`continue`
			`}`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`re_postfix = append(re_postfix, re_runes[i])`
			`}`
			`continue`
			`}`
Added support for some escaped metacharacters 2 months ago			`if re_runes[i] != '(' && re_runes[i] != '\|' && re_runes[i] != '\\' {`
First commit 2 months ago			`if i < len(re_runes)-1 {`
Added question mark operator - 0 or 1 2 months ago			`if re_runes[i+1] != '\|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' {`
First commit 2 months ago			`re_postfix = append(re_postfix, CONCAT)`
			`}`
			`}`
			`}`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`i++`
First commit 2 months ago			`}`

Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`opStack := make([]rune, 0) // Operator stack`
			`outQueue := make([]postfixNode, 0) // Output queue`
First commit 2 months ago
			`// Actual algorithm`
Added alternate function, removed relevant code from main; also started working on escape characters 2 months ago			`for i := 0; i < len(re_postfix); i++ {`
First commit 2 months ago			`/* Two cases:`
			`1. Current character is alphanumeric - send to output queue`
			`2. Current character is operator - do the following:`
			`a. If current character has greater priority than top of opStack, push to opStack.`
			`b. If not, keep popping from opStack (and appending to outQueue) until:`
			`i. opStack is empty, OR`
			`ii. current character has greater priority than top of opStack`
			`3. If current character is '(', push to opStack`
			`4. If current character is ')', pop from opStack (and append to outQueue) until '(' is found. Discard parantheses.`
Added support for character ranges and dot metacharacter 2 months ago			`5. If current character is '[', find all the characters until ']', then create a postfixNode containing all these contents. Add this node to outQueue.`
First commit 2 months ago			`*/`
Added alternate function, removed relevant code from main; also started working on escape characters 2 months ago			`c := re_postfix[i]`
First commit 2 months ago			`if isAlphaNum(c) {`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`outQueue = append(outQueue, newPostfixNode(c))`
Added alternate function, removed relevant code from main; also started working on escape characters 2 months ago			`continue`
First commit 2 months ago			`}`
Added support for some escaped metacharacters 2 months ago			`// Escape character`
			`if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary`
			`if i == len(re_postfix)-1 { // End of string - panic, because backslash is an escape character (something needs to come after it)`
			`panic("ERROR: Backslash with no escape character.")`
			`}`
			`i++`
			`switch re_postfix[i] {`
			`case 's': // Whitespace`
			`outQueue = append(outQueue, newPostfixNode(whitespaceChars...))`
			`case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it`
			`outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {`
Removed exclamation mark in inverted metacharacters - had the opposite effect becasue of the way deleteFunc works 2 months ago			`return slices.Contains(whitespaceChars, r)`
Added support for some escaped metacharacters 2 months ago			`})...))`
			`case 'd': // Digits`
			`outQueue = append(outQueue, newPostfixNode(digitChars...))`
			`case 'D': // Non-digits - same fancy way as 'S'`
			`outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {`
Removed exclamation mark in inverted metacharacters - had the opposite effect becasue of the way deleteFunc works 2 months ago			`return slices.Contains(digitChars, r)`
Added support for some escaped metacharacters 2 months ago			`})...))`
			`case 'w': // word character`
			`outQueue = append(outQueue, newPostfixNode(wordChars...))`
			`case 'W': // Non-word character - same fancy way as 'S' and 'D'`
			`outQueue = append(outQueue, newPostfixNode(slices.DeleteFunc(dotChars(), func(r rune) bool {`
Removed exclamation mark in inverted metacharacters - had the opposite effect becasue of the way deleteFunc works 2 months ago			`return slices.Contains(wordChars, r)`
Added support for some escaped metacharacters 2 months ago			`})...))`
			`default: // None of the above - append it as a regular character`
			`outQueue = append(outQueue, newPostfixCharNode(re_postfix[i]))`
			`}`
			`continue // Escaped character will automatically be skipped when loop variable increments`
			`}`
Added alternate function, removed relevant code from main; also started working on escape characters 2 months ago
Added support for character ranges and dot metacharacter 2 months ago			`if c == '.' { // Dot metacharacter - represents 'any' character, but I am only adding Unicode 0020-007E`
Added support for some escaped metacharacters 2 months ago			`outQueue = append(outQueue, newPostfixNode(dotChars()...))`
Added support for character ranges and dot metacharacter 2 months ago			`continue`
			`}`
First commit 2 months ago			`if isOperator(c) {`
			`if len(opStack) == 0 {`
			`opStack = append(opStack, c)`
			`} else {`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`topStack, err := peek(opStack)`
			`if err != nil {`
			`panic("ERROR: Operator without operand.")`
			`}`
			`if priority(c) > priority(topStack) { // 2a`
First commit 2 months ago			`opStack = append(opStack, c)`
			`} else {`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`for priority(c) <= priority(topStack) { // 2b`
			`to_append := mustPop(&opStack)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`outQueue = append(outQueue, newPostfixNode(to_append))`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`topStack, _ = peek(opStack)`
First commit 2 months ago			`}`
			`opStack = append(opStack, c)`
			`}`
			`}`
			`}`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`if c == '[' { // Used for character classes`
			`i++ // Step forward so we can look at the character class`
			`chars := make([]rune, 0) // List of characters - used only for character classes`
			`for i < len(re_postfix) {`
			`if re_postfix[i] == ']' {`
			`break`
			`}`
			`chars = append(chars, re_postfix[i])`
			`i++`
			`}`
			`if i == len(re_postfix) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.`
			`panic("ERROR: Opening bracket without closing bracket.")`
			`}`
			`outQueue = append(outQueue, newPostfixNode(chars...))`
			`i++ // Step forward to skip closing bracket`
			`continue`
			`}`
First commit 2 months ago			`if c == '(' {`
			`opStack = append(opStack, c)`
			`}`
			`if c == ')' {`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`// Keep popping from opStack until we encounter an opening parantheses. Panic if we reach the end of the stack.`
			`for val, err := peek(opStack); val != '('; val, err = peek(opStack) {`
			`if err != nil {`
			`panic("ERROR: Imbalanced parantheses.")`
			`}`
			`to_append := mustPop(&opStack)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`outQueue = append(outQueue, newPostfixNode(to_append))`
First commit 2 months ago			`}`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`_ = mustPop(&opStack) // Get rid of opening parantheses`
First commit 2 months ago			`}`
			`}`

			`// Pop all remaining operators (and append to outQueue)`
			`for len(opStack) > 0 {`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`to_append := mustPop(&opStack)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`outQueue = append(outQueue, newPostfixNode(to_append))`
First commit 2 months ago			`}`

Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`return outQueue`
First commit 2 months ago			`}`

			`// Thompson's algorithm. Constructs Finite-State Automaton from given string.`
			`// Returns start state.`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`func thompson(re []postfixNode) *State {`
Fixed Kleene Star matching 2 months ago			`nfa := make([]*State, 0) // Stack of states`
First commit 2 months ago			`for _, c := range re {`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`if c.nodetype == CHARACTER {`
First commit 2 months ago			`state := State{}`
Allow one state to map to multiple states with the same transition eg. ab\|aa 2 months ago			`state.transitions = make(map[int][]*State)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`state.content = rune2Contents(c.contents)`
First commit 2 months ago			`state.output = make([]*State, 0)`
			`state.output = append(state.output, &state)`
			`state.isEmpty = false`
Fixed Kleene Star matching 2 months ago			`nfa = append(nfa, &state)`
First commit 2 months ago			`}`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`// Must be an operator if it isn't a character`
			`switch c.nodetype {`
			`case CONCATENATE:`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`s2 := mustPop(&nfa)`
			`s1 := mustPop(&nfa)`
Added functions for concatenation and kleene star creation, removed relevant code from main 2 months ago			`s1 = concatenate(s1, s2)`
First commit 2 months ago			`nfa = append(nfa, s1)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`s1 := mustPop(&nfa)`
Added functions for concatenation and kleene star creation, removed relevant code from main 2 months ago			`stateToAdd := kleene(*s1)`
Fixed kleene star behavior, which used to behave like a '+' 2 months ago			`nfa = append(nfa, stateToAdd)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`case PLUS: // a+ is equivalent to aa*`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`s1 := mustPop(&nfa)`
Added functions for concatenation and kleene star creation, removed relevant code from main 2 months ago			`s2 := kleene(*s1)`
			`s1 = concatenate(s1, s2)`
Added more test cases 2 months ago			`nfa = append(nfa, s1)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`case QUESTION: // ab? is equivalent to a(b\|)`
Added question mark operator - 0 or 1 2 months ago			`s1 := mustPop(&nfa)`
			`s2 := &State{}`
			`s2.transitions = make(map[int][]*State)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`s2.content = newContents(EPSILON)`
Fixed small bug with question mark operator, wasn't adding the empty state's output 2 months ago			`s2.output = append(s2.output, s2)`
Added question mark operator - 0 or 1 2 months ago			`s2.isEmpty = true`
			`s3 := alternate(s1, s2)`
			`nfa = append(nfa, s3)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`case PIPE:`
Replaced 'pop' with 'mustPop' which panics on error 2 months ago			`s1 := mustPop(&nfa)`
			`s2 := mustPop(&nfa)`
Added alternate function, removed relevant code from main; also started working on escape characters 2 months ago			`s3 := alternate(s1, s2)`
			`nfa = append(nfa, s3)`
First commit 2 months ago			`}`
			`}`
			`if len(nfa) != 1 {`
			`panic("ERROR: Invalid Regex.")`
			`}`

			`verifyLastStates(nfa)`

			`return nfa[0]`

			`}`

			`func main() {`
More Kleene star fixes 2 months ago			`// Process:`
			`// 1. Convert regex into postfix notation (Shunting-Yard algorithm)`
			`// a. Add explicit concatenation operators to facilitate this`
			`// 2. Build NFA from postfix representation (Thompson's algorithm)`
			`// 3. Run the string against the NFA`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`if len(os.Args) != 2 {`
Rudimentary matching works 2 months ago			`fmt.Println("ERROR: Missing cmdline args")`
Fixed Kleene Star matching 2 months ago			`os.Exit(22)`
Rudimentary matching works 2 months ago			`}`
First commit 2 months ago			`var re string`
Rudimentary matching works 2 months ago			`re = os.Args[1]`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`var test_str string`
			`// Read test string from stdin`
			`reader := bufio.NewReader(os.Stdin)`
			`test_str, err := reader.ReadString('\n')`
			`if err != nil {`
			`panic(err)`
			`}`
			`fmt.Scanln(&test_str)`
First commit 2 months ago			`re_postfix := shuntingYard(re)`
Print matched content in color 2 months ago			`// fmt.Println(re_postfix)`
			`startState := thompson(re_postfix)`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`matchIndices := findAllMatches(startState, test_str)`
Initial support for multiple matching 2 months ago			`inColor := false`
			`if len(matchIndices) > 0 {`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`for i, c := range test_str {`
Initial support for multiple matching 2 months ago			`for _, indices := range matchIndices {`
			`if i >= indices.startIdx && i < indices.endIdx {`
			`color.New(color.FgRed).Printf("%c", c)`
			`inColor = true`
			`break`
			`}`
			`}`
			`if inColor == false {`
Print matched content in color 2 months ago			`fmt.Printf("%c", c)`
			`}`
Initial support for multiple matching 2 months ago			`inColor = false`
Print matched content in color 2 months ago			`}`
Rudimentary matching works 2 months ago			`} else {`
Added support for character classes (not ranges, yet); also take input from stdin instead of cmdline arg 2 months ago			`fmt.Print(test_str)`
Rudimentary matching works 2 months ago			`}`
First commit 2 months ago			`}`