Fixed kleene star behavior, which used to behave like a '+'
This commit is contained in:
19
main.go
19
main.go
@@ -9,7 +9,6 @@ import (
|
||||
)
|
||||
|
||||
const CONCAT rune = '~'
|
||||
const UNION int = 0
|
||||
|
||||
func isOperator(c rune) bool {
|
||||
if c == '*' || c == '|' || c == CONCAT {
|
||||
@@ -126,15 +125,19 @@ func thompson(re string) *State {
|
||||
}
|
||||
s1.output = s2.output
|
||||
nfa = append(nfa, s1)
|
||||
case '*':
|
||||
case '*': // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
|
||||
s1 := pop(&nfa)
|
||||
stateToAdd := &State{}
|
||||
stateToAdd.transitions = make(map[int][]*State)
|
||||
stateToAdd.content = EPSILON
|
||||
stateToAdd.isEmpty = true
|
||||
stateToAdd.isKleene = true
|
||||
stateToAdd.output = append(stateToAdd.output, stateToAdd)
|
||||
for i := range s1.output {
|
||||
s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], s1)
|
||||
s1.output[i].transitions[stateToAdd.content] = append(s1.output[i].transitions[stateToAdd.content], stateToAdd)
|
||||
}
|
||||
// Reset output to s1 (in case s1 was a union operator state, which has multiple outputs)
|
||||
s1.output = nil
|
||||
s1.output = append(s1.output, s1)
|
||||
nfa = append(nfa, s1)
|
||||
stateToAdd.transitions[s1.content] = append(stateToAdd.transitions[s1.content], s1)
|
||||
nfa = append(nfa, stateToAdd)
|
||||
case '|':
|
||||
s1 := pop(&nfa)
|
||||
s2 := pop(&nfa)
|
||||
@@ -143,7 +146,7 @@ func thompson(re string) *State {
|
||||
s3.output = append(s3.output, s1, s2)
|
||||
s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
|
||||
s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
|
||||
s3.content = UNION
|
||||
s3.content = EPSILON
|
||||
s3.isEmpty = true
|
||||
|
||||
nfa = append(nfa, &s3)
|
||||
|
15
matching.go
15
matching.go
@@ -8,13 +8,14 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
|
||||
i := 0 // Index in string
|
||||
// Increment until we hit a character matching the start state
|
||||
if start.isEmpty == false {
|
||||
for int(str[i]) != start.content {
|
||||
for i < len(str) && int(str[i]) != start.content {
|
||||
i++
|
||||
}
|
||||
i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
|
||||
}
|
||||
currentStates = append(currentStates, start)
|
||||
startIdx = i
|
||||
i++ // Advance to next character so that we can check for transitions
|
||||
|
||||
// Main loop
|
||||
for i < len(str) {
|
||||
// If there are any 0-transitions, take those
|
||||
@@ -45,7 +46,15 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
|
||||
i++
|
||||
}
|
||||
|
||||
// End-of-string reached. Check if any of our states is in the end position.
|
||||
// End-of-string reached. Go to any 0-states. Then check if any of our states are in the end position.
|
||||
for _, state := range currentStates {
|
||||
if len(state.transitions[EPSILON]) > 0 {
|
||||
tempStates = append(tempStates, state.transitions[EPSILON]...)
|
||||
}
|
||||
}
|
||||
copy(currentStates, tempStates)
|
||||
tempStates = nil
|
||||
|
||||
for _, state := range currentStates {
|
||||
if state.isLast {
|
||||
endIdx = i
|
||||
|
12
nfa.go
12
nfa.go
@@ -4,10 +4,11 @@ const EPSILON int = 0
|
||||
|
||||
type State struct {
|
||||
content int // Contents of current state
|
||||
isEmpty bool // If it is empty - Union operator states will be empty
|
||||
isEmpty bool // If it is empty - Union operator and Kleene star states will be empty
|
||||
isLast bool // If it is the last state (acept state)
|
||||
output []*State // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
|
||||
transitions map[int][]*State // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
|
||||
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
|
||||
}
|
||||
|
||||
type NFA struct {
|
||||
@@ -25,6 +26,15 @@ func verifyLastStatesHelper(state *State, visited map[*State]bool) {
|
||||
state.isLast = true
|
||||
return
|
||||
}
|
||||
|
||||
if len(state.transitions) == 1 && state.isKleene { // A State representing a Kleene Star has a transition going out, which loops back to it. If that is the only transition (and it contains only one state), then it must be a last-state
|
||||
for _, v := range state.transitions { // Should only loop once
|
||||
if len(v) == 1 {
|
||||
state.isLast = true
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
if visited[state] == true {
|
||||
return
|
||||
}
|
||||
|
Reference in New Issue
Block a user