Fixed Kleene Star matching

master
Aadhavan Srinivasan 3 months ago
parent d191686168
commit bc11777ad5

@ -102,8 +102,8 @@ func shuntingYard(re string) string {
// Thompson's algorithm. Constructs Finite-State Automaton from given string.
// Returns start state.
func thompson(re string) State {
nfa := make([]State, 0) // Stack of states
func thompson(re string) *State {
nfa := make([]*State, 0) // Stack of states
for _, c := range re {
if isAlphaNum(c) {
state := State{}
@ -112,7 +112,7 @@ func thompson(re string) State {
state.output = make([]*State, 0)
state.output = append(state.output, &state)
state.isEmpty = false
nfa = append(nfa, state)
nfa = append(nfa, &state)
}
// Must be an operator if it isn't alphanumeric
switch c {
@ -120,31 +120,31 @@ func thompson(re string) State {
s2 := pop(&nfa)
s1 := pop(&nfa)
for i := range s1.output {
s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], &s2)
s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], s2)
}
s1.output = s2.output
nfa = append(nfa, s1)
case '*':
s1 := pop(&nfa)
for i := range s1.output {
s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], &s1)
s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], s1)
}
// Reset output to s1 (in case s1 was a union operator state, which has multiple outputs)
s1.output = nil
s1.output = append(s1.output, &s1)
s1.output = append(s1.output, s1)
nfa = append(nfa, s1)
case '|':
s1 := pop(&nfa)
s2 := pop(&nfa)
s3 := State{}
s3.transitions = make(map[int][]*State)
s3.output = append(s3.output, &s1, &s2)
s3.transitions[s1.content] = append(s3.transitions[s1.content], &s1)
s3.transitions[s2.content] = append(s3.transitions[s2.content], &s2)
s3.output = append(s3.output, s1, s2)
s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
s3.content = UNION
s3.isEmpty = true
nfa = append(nfa, s3)
nfa = append(nfa, &s3)
}
}
if len(nfa) != 1 {
@ -160,13 +160,14 @@ func thompson(re string) State {
func main() {
if len(os.Args) < 3 {
fmt.Println("ERROR: Missing cmdline args")
os.Exit(22)
}
var re string
re = os.Args[1]
re_postfix := shuntingYard(re)
fmt.Println(re_postfix)
start := thompson(re_postfix)
s, e, matched := match(&start, os.Args[2])
s, e, matched := match(start, os.Args[2])
if matched {
fmt.Printf("Matched from %d to %d\n", s, e)
} else {

@ -31,22 +31,30 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
for _, state := range currentStates {
if len(state.transitions[int(str[i])]) > 0 {
tempStates = append(tempStates, state.transitions[int(str[i])]...)
} else {
// This enables the 'greedy' behavior - last-state status is only checked if we can't match anything else
if state.isLast {
endIdx = i
return startIdx, endIdx, true
}
}
}
copy(currentStates, tempStates)
tempStates = nil
// If any of the current-states is a last state, return true.
for _, state := range currentStates {
if state.isLast {
endIdx = i
return startIdx, endIdx, true
}
}
i++
}
// We don't seem to have reached a last-state. Return fals
return -1, -1, false
// End-of-string reached. Check if any of our states is in the end position.
for _, state := range currentStates {
if state.isLast {
endIdx = i
return startIdx, endIdx, true
} else {
return -1, -1, false
}
}
// Default
return -1, -1, false
}

@ -39,6 +39,6 @@ func verifyLastStatesHelper(state *State, visited map[*State]bool) {
}
// verifyLastStates penables the 'isLast' flag for the leaf nodes (last states)
func verifyLastStates(start []State) {
verifyLastStatesHelper(&start[0], make(map[*State]bool))
func verifyLastStates(start []*State) {
verifyLastStatesHelper(start[0], make(map[*State]bool))
}

Loading…
Cancel
Save