diff --git a/main.go b/main.go index 1c05625..b0a9409 100644 --- a/main.go +++ b/main.go @@ -3,6 +3,7 @@ package main import ( "bufio" "fmt" + "io" "os" "slices" @@ -128,6 +129,13 @@ func shuntingYard(re string) []postfixNode { outQueue = append(outQueue, newPostfixNode(dotChars()...)) continue } + if c == '^' { // Start-of-string assertion + outQueue = append(outQueue, newPostfixNode(c)) + } + if c == '$' { // End-of-string assertion + outQueue = append(outQueue, newPostfixNode(c)) + } + if isOperator(c) { if len(opStack) == 0 { opStack = append(opStack, c) @@ -195,13 +203,27 @@ func shuntingYard(re string) []postfixNode { func thompson(re []postfixNode) *State { nfa := make([]*State, 0) // Stack of states for _, c := range re { - if c.nodetype == CHARACTER { + if c.nodetype == CHARACTER || c.nodetype == ASSERTION { state := State{} state.transitions = make(map[int][]*State) state.content = rune2Contents(c.contents) state.output = make([]*State, 0) state.output = append(state.output, &state) state.isEmpty = false + if c.nodetype == ASSERTION { + state.content = newContents(EPSILON) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string + state.isEmpty = true + switch c.contents[0] { + case '^': + state.assert = SOS + case '$': + state.assert = EOS + case 'b': + state.assert = WBOUND + case 'B': + state.assert = NONWBOUND + } + } nfa = append(nfa, &state) } // Must be an operator if it isn't a character @@ -262,14 +284,14 @@ func main() { // Read test string from stdin reader := bufio.NewReader(os.Stdin) test_str, err := reader.ReadString('\n') - if err != nil { + if err != nil && err != io.EOF { panic(err) } fmt.Scanln(&test_str) re_postfix := shuntingYard(re) - // fmt.Println(re_postfix) startState := thompson(re_postfix) matchIndices := findAllMatches(startState, test_str) + inColor := false if len(matchIndices) > 0 { for i, c := range test_str {