From bc11777ad527007ee389c300fc3a45d77c3a5614 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Tue, 22 Oct 2024 17:07:01 -0400 Subject: [PATCH] Fixed Kleene Star matching --- main.go | 23 ++++++++++++----------- matching.go | 26 +++++++++++++++++--------- nfa.go | 4 ++-- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/main.go b/main.go index e885475..b802b15 100644 --- a/main.go +++ b/main.go @@ -102,8 +102,8 @@ func shuntingYard(re string) string { // Thompson's algorithm. Constructs Finite-State Automaton from given string. // Returns start state. -func thompson(re string) State { - nfa := make([]State, 0) // Stack of states +func thompson(re string) *State { + nfa := make([]*State, 0) // Stack of states for _, c := range re { if isAlphaNum(c) { state := State{} @@ -112,7 +112,7 @@ func thompson(re string) State { state.output = make([]*State, 0) state.output = append(state.output, &state) state.isEmpty = false - nfa = append(nfa, state) + nfa = append(nfa, &state) } // Must be an operator if it isn't alphanumeric switch c { @@ -120,31 +120,31 @@ func thompson(re string) State { s2 := pop(&nfa) s1 := pop(&nfa) for i := range s1.output { - s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], &s2) + s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], s2) } s1.output = s2.output nfa = append(nfa, s1) case '*': s1 := pop(&nfa) for i := range s1.output { - s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], &s1) + s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], s1) } // Reset output to s1 (in case s1 was a union operator state, which has multiple outputs) s1.output = nil - s1.output = append(s1.output, &s1) + s1.output = append(s1.output, s1) nfa = append(nfa, s1) case '|': s1 := pop(&nfa) s2 := pop(&nfa) s3 := State{} s3.transitions = make(map[int][]*State) - s3.output = append(s3.output, &s1, &s2) - s3.transitions[s1.content] = append(s3.transitions[s1.content], &s1) - s3.transitions[s2.content] = append(s3.transitions[s2.content], &s2) + s3.output = append(s3.output, s1, s2) + s3.transitions[s1.content] = append(s3.transitions[s1.content], s1) + s3.transitions[s2.content] = append(s3.transitions[s2.content], s2) s3.content = UNION s3.isEmpty = true - nfa = append(nfa, s3) + nfa = append(nfa, &s3) } } if len(nfa) != 1 { @@ -160,13 +160,14 @@ func thompson(re string) State { func main() { if len(os.Args) < 3 { fmt.Println("ERROR: Missing cmdline args") + os.Exit(22) } var re string re = os.Args[1] re_postfix := shuntingYard(re) fmt.Println(re_postfix) start := thompson(re_postfix) - s, e, matched := match(&start, os.Args[2]) + s, e, matched := match(start, os.Args[2]) if matched { fmt.Printf("Matched from %d to %d\n", s, e) } else { diff --git a/matching.go b/matching.go index 4678cdf..a55a14d 100644 --- a/matching.go +++ b/matching.go @@ -31,22 +31,30 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) { for _, state := range currentStates { if len(state.transitions[int(str[i])]) > 0 { tempStates = append(tempStates, state.transitions[int(str[i])]...) + } else { + // This enables the 'greedy' behavior - last-state status is only checked if we can't match anything else + if state.isLast { + endIdx = i + return startIdx, endIdx, true + } } } copy(currentStates, tempStates) tempStates = nil - // If any of the current-states is a last state, return true. - for _, state := range currentStates { - if state.isLast { - endIdx = i - return startIdx, endIdx, true - } - } i++ } - // We don't seem to have reached a last-state. Return fals - return -1, -1, false + // End-of-string reached. Check if any of our states is in the end position. + for _, state := range currentStates { + if state.isLast { + endIdx = i + return startIdx, endIdx, true + } else { + return -1, -1, false + } + } + // Default + return -1, -1, false } diff --git a/nfa.go b/nfa.go index 030d21c..53cf37a 100644 --- a/nfa.go +++ b/nfa.go @@ -39,6 +39,6 @@ func verifyLastStatesHelper(state *State, visited map[*State]bool) { } // verifyLastStates penables the 'isLast' flag for the leaf nodes (last states) -func verifyLastStates(start []State) { - verifyLastStatesHelper(&start[0], make(map[*State]bool)) +func verifyLastStates(start []*State) { + verifyLastStatesHelper(start[0], make(map[*State]bool)) }