Fixed kleene star behavior, which used to behave like a '+'

master
Aadhavan Srinivasan 2 months ago
parent 2cd43bf2a1
commit 9d3bc2b804

@ -9,7 +9,6 @@ import (
) )
const CONCAT rune = '~' const CONCAT rune = '~'
const UNION int = 0
func isOperator(c rune) bool { func isOperator(c rune) bool {
if c == '*' || c == '|' || c == CONCAT { if c == '*' || c == '|' || c == CONCAT {
@ -126,15 +125,19 @@ func thompson(re string) *State {
} }
s1.output = s2.output s1.output = s2.output
nfa = append(nfa, s1) nfa = append(nfa, s1)
case '*': case '*': // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
s1 := pop(&nfa) s1 := pop(&nfa)
stateToAdd := &State{}
stateToAdd.transitions = make(map[int][]*State)
stateToAdd.content = EPSILON
stateToAdd.isEmpty = true
stateToAdd.isKleene = true
stateToAdd.output = append(stateToAdd.output, stateToAdd)
for i := range s1.output { for i := range s1.output {
s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], s1) s1.output[i].transitions[stateToAdd.content] = append(s1.output[i].transitions[stateToAdd.content], stateToAdd)
} }
// Reset output to s1 (in case s1 was a union operator state, which has multiple outputs) stateToAdd.transitions[s1.content] = append(stateToAdd.transitions[s1.content], s1)
s1.output = nil nfa = append(nfa, stateToAdd)
s1.output = append(s1.output, s1)
nfa = append(nfa, s1)
case '|': case '|':
s1 := pop(&nfa) s1 := pop(&nfa)
s2 := pop(&nfa) s2 := pop(&nfa)
@ -143,7 +146,7 @@ func thompson(re string) *State {
s3.output = append(s3.output, s1, s2) s3.output = append(s3.output, s1, s2)
s3.transitions[s1.content] = append(s3.transitions[s1.content], s1) s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
s3.transitions[s2.content] = append(s3.transitions[s2.content], s2) s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
s3.content = UNION s3.content = EPSILON
s3.isEmpty = true s3.isEmpty = true
nfa = append(nfa, &s3) nfa = append(nfa, &s3)

@ -8,13 +8,14 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
i := 0 // Index in string i := 0 // Index in string
// Increment until we hit a character matching the start state // Increment until we hit a character matching the start state
if start.isEmpty == false { if start.isEmpty == false {
for int(str[i]) != start.content { for i < len(str) && int(str[i]) != start.content {
i++ i++
} }
i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
} }
currentStates = append(currentStates, start) currentStates = append(currentStates, start)
startIdx = i startIdx = i
i++ // Advance to next character so that we can check for transitions
// Main loop // Main loop
for i < len(str) { for i < len(str) {
// If there are any 0-transitions, take those // If there are any 0-transitions, take those
@ -45,7 +46,15 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
i++ i++
} }
// End-of-string reached. Check if any of our states is in the end position. // End-of-string reached. Go to any 0-states. Then check if any of our states are in the end position.
for _, state := range currentStates {
if len(state.transitions[EPSILON]) > 0 {
tempStates = append(tempStates, state.transitions[EPSILON]...)
}
}
copy(currentStates, tempStates)
tempStates = nil
for _, state := range currentStates { for _, state := range currentStates {
if state.isLast { if state.isLast {
endIdx = i endIdx = i

@ -4,10 +4,11 @@ const EPSILON int = 0
type State struct { type State struct {
content int // Contents of current state content int // Contents of current state
isEmpty bool // If it is empty - Union operator states will be empty isEmpty bool // If it is empty - Union operator and Kleene star states will be empty
isLast bool // If it is the last state (acept state) isLast bool // If it is the last state (acept state)
output []*State // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these. output []*State // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
transitions map[int][]*State // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa) transitions map[int][]*State // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
} }
type NFA struct { type NFA struct {
@ -25,6 +26,15 @@ func verifyLastStatesHelper(state *State, visited map[*State]bool) {
state.isLast = true state.isLast = true
return return
} }
if len(state.transitions) == 1 && state.isKleene { // A State representing a Kleene Star has a transition going out, which loops back to it. If that is the only transition (and it contains only one state), then it must be a last-state
for _, v := range state.transitions { // Should only loop once
if len(v) == 1 {
state.isLast = true
return
}
}
}
if visited[state] == true { if visited[state] == true {
return return
} }

Loading…
Cancel
Save