From 9d3bc2b804e9ec27fa5c2e220f80b4c3ce5c6af9 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Wed, 23 Oct 2024 08:51:40 -0400 Subject: [PATCH] Fixed kleene star behavior, which used to behave like a '+' --- main.go | 19 +++++++++++-------- matching.go | 15 ++++++++++++--- nfa.go | 12 +++++++++++- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/main.go b/main.go index 6dd9a9e..1ca719b 100644 --- a/main.go +++ b/main.go @@ -9,7 +9,6 @@ import ( ) const CONCAT rune = '~' -const UNION int = 0 func isOperator(c rune) bool { if c == '*' || c == '|' || c == CONCAT { @@ -126,15 +125,19 @@ func thompson(re string) *State { } s1.output = s2.output nfa = append(nfa, s1) - case '*': + case '*': // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state s1 := pop(&nfa) + stateToAdd := &State{} + stateToAdd.transitions = make(map[int][]*State) + stateToAdd.content = EPSILON + stateToAdd.isEmpty = true + stateToAdd.isKleene = true + stateToAdd.output = append(stateToAdd.output, stateToAdd) for i := range s1.output { - s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], s1) + s1.output[i].transitions[stateToAdd.content] = append(s1.output[i].transitions[stateToAdd.content], stateToAdd) } - // Reset output to s1 (in case s1 was a union operator state, which has multiple outputs) - s1.output = nil - s1.output = append(s1.output, s1) - nfa = append(nfa, s1) + stateToAdd.transitions[s1.content] = append(stateToAdd.transitions[s1.content], s1) + nfa = append(nfa, stateToAdd) case '|': s1 := pop(&nfa) s2 := pop(&nfa) @@ -143,7 +146,7 @@ func thompson(re string) *State { s3.output = append(s3.output, s1, s2) s3.transitions[s1.content] = append(s3.transitions[s1.content], s1) s3.transitions[s2.content] = append(s3.transitions[s2.content], s2) - s3.content = UNION + s3.content = EPSILON s3.isEmpty = true nfa = append(nfa, &s3) diff --git a/matching.go b/matching.go index a55a14d..fc060ac 100644 --- a/matching.go +++ b/matching.go @@ -8,13 +8,14 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) { i := 0 // Index in string // Increment until we hit a character matching the start state if start.isEmpty == false { - for int(str[i]) != start.content { + for i < len(str) && int(str[i]) != start.content { i++ } + i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character } currentStates = append(currentStates, start) startIdx = i - i++ // Advance to next character so that we can check for transitions + // Main loop for i < len(str) { // If there are any 0-transitions, take those @@ -45,7 +46,15 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) { i++ } - // End-of-string reached. Check if any of our states is in the end position. + // End-of-string reached. Go to any 0-states. Then check if any of our states are in the end position. + for _, state := range currentStates { + if len(state.transitions[EPSILON]) > 0 { + tempStates = append(tempStates, state.transitions[EPSILON]...) + } + } + copy(currentStates, tempStates) + tempStates = nil + for _, state := range currentStates { if state.isLast { endIdx = i diff --git a/nfa.go b/nfa.go index 53cf37a..cc1a9c2 100644 --- a/nfa.go +++ b/nfa.go @@ -4,10 +4,11 @@ const EPSILON int = 0 type State struct { content int // Contents of current state - isEmpty bool // If it is empty - Union operator states will be empty + isEmpty bool // If it is empty - Union operator and Kleene star states will be empty isLast bool // If it is the last state (acept state) output []*State // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these. transitions map[int][]*State // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa) + isKleene bool // Identifies whether current node is a 0-state representing Kleene star } type NFA struct { @@ -25,6 +26,15 @@ func verifyLastStatesHelper(state *State, visited map[*State]bool) { state.isLast = true return } + + if len(state.transitions) == 1 && state.isKleene { // A State representing a Kleene Star has a transition going out, which loops back to it. If that is the only transition (and it contains only one state), then it must be a last-state + for _, v := range state.transitions { // Should only loop once + if len(v) == 1 { + state.isLast = true + return + } + } + } if visited[state] == true { return }