Added support for detecting assertion characters; changed input so that newline isn't required

master
Aadhavan Srinivasan 2 months ago
parent a752491563
commit 8dbecde3ae

@ -3,6 +3,7 @@ package main
import ( import (
"bufio" "bufio"
"fmt" "fmt"
"io"
"os" "os"
"slices" "slices"
@ -128,6 +129,13 @@ func shuntingYard(re string) []postfixNode {
outQueue = append(outQueue, newPostfixNode(dotChars()...)) outQueue = append(outQueue, newPostfixNode(dotChars()...))
continue continue
} }
if c == '^' { // Start-of-string assertion
outQueue = append(outQueue, newPostfixNode(c))
}
if c == '$' { // End-of-string assertion
outQueue = append(outQueue, newPostfixNode(c))
}
if isOperator(c) { if isOperator(c) {
if len(opStack) == 0 { if len(opStack) == 0 {
opStack = append(opStack, c) opStack = append(opStack, c)
@ -195,13 +203,27 @@ func shuntingYard(re string) []postfixNode {
func thompson(re []postfixNode) *State { func thompson(re []postfixNode) *State {
nfa := make([]*State, 0) // Stack of states nfa := make([]*State, 0) // Stack of states
for _, c := range re { for _, c := range re {
if c.nodetype == CHARACTER { if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
state := State{} state := State{}
state.transitions = make(map[int][]*State) state.transitions = make(map[int][]*State)
state.content = rune2Contents(c.contents) state.content = rune2Contents(c.contents)
state.output = make([]*State, 0) state.output = make([]*State, 0)
state.output = append(state.output, &state) state.output = append(state.output, &state)
state.isEmpty = false state.isEmpty = false
if c.nodetype == ASSERTION {
state.content = newContents(EPSILON) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string
state.isEmpty = true
switch c.contents[0] {
case '^':
state.assert = SOS
case '$':
state.assert = EOS
case 'b':
state.assert = WBOUND
case 'B':
state.assert = NONWBOUND
}
}
nfa = append(nfa, &state) nfa = append(nfa, &state)
} }
// Must be an operator if it isn't a character // Must be an operator if it isn't a character
@ -262,14 +284,14 @@ func main() {
// Read test string from stdin // Read test string from stdin
reader := bufio.NewReader(os.Stdin) reader := bufio.NewReader(os.Stdin)
test_str, err := reader.ReadString('\n') test_str, err := reader.ReadString('\n')
if err != nil { if err != nil && err != io.EOF {
panic(err) panic(err)
} }
fmt.Scanln(&test_str) fmt.Scanln(&test_str)
re_postfix := shuntingYard(re) re_postfix := shuntingYard(re)
// fmt.Println(re_postfix)
startState := thompson(re_postfix) startState := thompson(re_postfix)
matchIndices := findAllMatches(startState, test_str) matchIndices := findAllMatches(startState, test_str)
inColor := false inColor := false
if len(matchIndices) > 0 { if len(matchIndices) > 0 {
for i, c := range test_str { for i, c := range test_str {

Loading…
Cancel
Save