Fixed Kleene Star matching

master
Aadhavan Srinivasan 2 months ago
parent d191686168
commit bc11777ad5

@ -102,8 +102,8 @@ func shuntingYard(re string) string {
// Thompson's algorithm. Constructs Finite-State Automaton from given string. // Thompson's algorithm. Constructs Finite-State Automaton from given string.
// Returns start state. // Returns start state.
func thompson(re string) State { func thompson(re string) *State {
nfa := make([]State, 0) // Stack of states nfa := make([]*State, 0) // Stack of states
for _, c := range re { for _, c := range re {
if isAlphaNum(c) { if isAlphaNum(c) {
state := State{} state := State{}
@ -112,7 +112,7 @@ func thompson(re string) State {
state.output = make([]*State, 0) state.output = make([]*State, 0)
state.output = append(state.output, &state) state.output = append(state.output, &state)
state.isEmpty = false state.isEmpty = false
nfa = append(nfa, state) nfa = append(nfa, &state)
} }
// Must be an operator if it isn't alphanumeric // Must be an operator if it isn't alphanumeric
switch c { switch c {
@ -120,31 +120,31 @@ func thompson(re string) State {
s2 := pop(&nfa) s2 := pop(&nfa)
s1 := pop(&nfa) s1 := pop(&nfa)
for i := range s1.output { for i := range s1.output {
s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], &s2) s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], s2)
} }
s1.output = s2.output s1.output = s2.output
nfa = append(nfa, s1) nfa = append(nfa, s1)
case '*': case '*':
s1 := pop(&nfa) s1 := pop(&nfa)
for i := range s1.output { for i := range s1.output {
s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], &s1) s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], s1)
} }
// Reset output to s1 (in case s1 was a union operator state, which has multiple outputs) // Reset output to s1 (in case s1 was a union operator state, which has multiple outputs)
s1.output = nil s1.output = nil
s1.output = append(s1.output, &s1) s1.output = append(s1.output, s1)
nfa = append(nfa, s1) nfa = append(nfa, s1)
case '|': case '|':
s1 := pop(&nfa) s1 := pop(&nfa)
s2 := pop(&nfa) s2 := pop(&nfa)
s3 := State{} s3 := State{}
s3.transitions = make(map[int][]*State) s3.transitions = make(map[int][]*State)
s3.output = append(s3.output, &s1, &s2) s3.output = append(s3.output, s1, s2)
s3.transitions[s1.content] = append(s3.transitions[s1.content], &s1) s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
s3.transitions[s2.content] = append(s3.transitions[s2.content], &s2) s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
s3.content = UNION s3.content = UNION
s3.isEmpty = true s3.isEmpty = true
nfa = append(nfa, s3) nfa = append(nfa, &s3)
} }
} }
if len(nfa) != 1 { if len(nfa) != 1 {
@ -160,13 +160,14 @@ func thompson(re string) State {
func main() { func main() {
if len(os.Args) < 3 { if len(os.Args) < 3 {
fmt.Println("ERROR: Missing cmdline args") fmt.Println("ERROR: Missing cmdline args")
os.Exit(22)
} }
var re string var re string
re = os.Args[1] re = os.Args[1]
re_postfix := shuntingYard(re) re_postfix := shuntingYard(re)
fmt.Println(re_postfix) fmt.Println(re_postfix)
start := thompson(re_postfix) start := thompson(re_postfix)
s, e, matched := match(&start, os.Args[2]) s, e, matched := match(start, os.Args[2])
if matched { if matched {
fmt.Printf("Matched from %d to %d\n", s, e) fmt.Printf("Matched from %d to %d\n", s, e)
} else { } else {

@ -31,22 +31,30 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
for _, state := range currentStates { for _, state := range currentStates {
if len(state.transitions[int(str[i])]) > 0 { if len(state.transitions[int(str[i])]) > 0 {
tempStates = append(tempStates, state.transitions[int(str[i])]...) tempStates = append(tempStates, state.transitions[int(str[i])]...)
} else {
// This enables the 'greedy' behavior - last-state status is only checked if we can't match anything else
if state.isLast {
endIdx = i
return startIdx, endIdx, true
}
} }
} }
copy(currentStates, tempStates) copy(currentStates, tempStates)
tempStates = nil tempStates = nil
// If any of the current-states is a last state, return true. i++
}
// End-of-string reached. Check if any of our states is in the end position.
for _, state := range currentStates { for _, state := range currentStates {
if state.isLast { if state.isLast {
endIdx = i endIdx = i
return startIdx, endIdx, true return startIdx, endIdx, true
} else {
return -1, -1, false
} }
} }
i++
}
// We don't seem to have reached a last-state. Return fals // Default
return -1, -1, false return -1, -1, false
} }

@ -39,6 +39,6 @@ func verifyLastStatesHelper(state *State, visited map[*State]bool) {
} }
// verifyLastStates penables the 'isLast' flag for the leaf nodes (last states) // verifyLastStates penables the 'isLast' flag for the leaf nodes (last states)
func verifyLastStates(start []State) { func verifyLastStates(start []*State) {
verifyLastStatesHelper(&start[0], make(map[*State]bool)) verifyLastStatesHelper(start[0], make(map[*State]bool))
} }

Loading…
Cancel
Save