Fixed kleene star behavior, which used to behave like a '+'
This commit is contained in:
19
main.go
19
main.go
@@ -9,7 +9,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const CONCAT rune = '~'
|
const CONCAT rune = '~'
|
||||||
const UNION int = 0
|
|
||||||
|
|
||||||
func isOperator(c rune) bool {
|
func isOperator(c rune) bool {
|
||||||
if c == '*' || c == '|' || c == CONCAT {
|
if c == '*' || c == '|' || c == CONCAT {
|
||||||
@@ -126,15 +125,19 @@ func thompson(re string) *State {
|
|||||||
}
|
}
|
||||||
s1.output = s2.output
|
s1.output = s2.output
|
||||||
nfa = append(nfa, s1)
|
nfa = append(nfa, s1)
|
||||||
case '*':
|
case '*': // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
|
||||||
s1 := pop(&nfa)
|
s1 := pop(&nfa)
|
||||||
|
stateToAdd := &State{}
|
||||||
|
stateToAdd.transitions = make(map[int][]*State)
|
||||||
|
stateToAdd.content = EPSILON
|
||||||
|
stateToAdd.isEmpty = true
|
||||||
|
stateToAdd.isKleene = true
|
||||||
|
stateToAdd.output = append(stateToAdd.output, stateToAdd)
|
||||||
for i := range s1.output {
|
for i := range s1.output {
|
||||||
s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], s1)
|
s1.output[i].transitions[stateToAdd.content] = append(s1.output[i].transitions[stateToAdd.content], stateToAdd)
|
||||||
}
|
}
|
||||||
// Reset output to s1 (in case s1 was a union operator state, which has multiple outputs)
|
stateToAdd.transitions[s1.content] = append(stateToAdd.transitions[s1.content], s1)
|
||||||
s1.output = nil
|
nfa = append(nfa, stateToAdd)
|
||||||
s1.output = append(s1.output, s1)
|
|
||||||
nfa = append(nfa, s1)
|
|
||||||
case '|':
|
case '|':
|
||||||
s1 := pop(&nfa)
|
s1 := pop(&nfa)
|
||||||
s2 := pop(&nfa)
|
s2 := pop(&nfa)
|
||||||
@@ -143,7 +146,7 @@ func thompson(re string) *State {
|
|||||||
s3.output = append(s3.output, s1, s2)
|
s3.output = append(s3.output, s1, s2)
|
||||||
s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
|
s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
|
||||||
s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
|
s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
|
||||||
s3.content = UNION
|
s3.content = EPSILON
|
||||||
s3.isEmpty = true
|
s3.isEmpty = true
|
||||||
|
|
||||||
nfa = append(nfa, &s3)
|
nfa = append(nfa, &s3)
|
||||||
|
15
matching.go
15
matching.go
@@ -8,13 +8,14 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
|
|||||||
i := 0 // Index in string
|
i := 0 // Index in string
|
||||||
// Increment until we hit a character matching the start state
|
// Increment until we hit a character matching the start state
|
||||||
if start.isEmpty == false {
|
if start.isEmpty == false {
|
||||||
for int(str[i]) != start.content {
|
for i < len(str) && int(str[i]) != start.content {
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
|
||||||
}
|
}
|
||||||
currentStates = append(currentStates, start)
|
currentStates = append(currentStates, start)
|
||||||
startIdx = i
|
startIdx = i
|
||||||
i++ // Advance to next character so that we can check for transitions
|
|
||||||
// Main loop
|
// Main loop
|
||||||
for i < len(str) {
|
for i < len(str) {
|
||||||
// If there are any 0-transitions, take those
|
// If there are any 0-transitions, take those
|
||||||
@@ -45,7 +46,15 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
|
|||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
// End-of-string reached. Check if any of our states is in the end position.
|
// End-of-string reached. Go to any 0-states. Then check if any of our states are in the end position.
|
||||||
|
for _, state := range currentStates {
|
||||||
|
if len(state.transitions[EPSILON]) > 0 {
|
||||||
|
tempStates = append(tempStates, state.transitions[EPSILON]...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
copy(currentStates, tempStates)
|
||||||
|
tempStates = nil
|
||||||
|
|
||||||
for _, state := range currentStates {
|
for _, state := range currentStates {
|
||||||
if state.isLast {
|
if state.isLast {
|
||||||
endIdx = i
|
endIdx = i
|
||||||
|
12
nfa.go
12
nfa.go
@@ -4,10 +4,11 @@ const EPSILON int = 0
|
|||||||
|
|
||||||
type State struct {
|
type State struct {
|
||||||
content int // Contents of current state
|
content int // Contents of current state
|
||||||
isEmpty bool // If it is empty - Union operator states will be empty
|
isEmpty bool // If it is empty - Union operator and Kleene star states will be empty
|
||||||
isLast bool // If it is the last state (acept state)
|
isLast bool // If it is the last state (acept state)
|
||||||
output []*State // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
|
output []*State // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
|
||||||
transitions map[int][]*State // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
|
transitions map[int][]*State // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
|
||||||
|
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
|
||||||
}
|
}
|
||||||
|
|
||||||
type NFA struct {
|
type NFA struct {
|
||||||
@@ -25,6 +26,15 @@ func verifyLastStatesHelper(state *State, visited map[*State]bool) {
|
|||||||
state.isLast = true
|
state.isLast = true
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(state.transitions) == 1 && state.isKleene { // A State representing a Kleene Star has a transition going out, which loops back to it. If that is the only transition (and it contains only one state), then it must be a last-state
|
||||||
|
for _, v := range state.transitions { // Should only loop once
|
||||||
|
if len(v) == 1 {
|
||||||
|
state.isLast = true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if visited[state] == true {
|
if visited[state] == true {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user