Fixed Kleene Star matching
This commit is contained in:
23
main.go
23
main.go
@@ -102,8 +102,8 @@ func shuntingYard(re string) string {
|
|||||||
|
|
||||||
// Thompson's algorithm. Constructs Finite-State Automaton from given string.
|
// Thompson's algorithm. Constructs Finite-State Automaton from given string.
|
||||||
// Returns start state.
|
// Returns start state.
|
||||||
func thompson(re string) State {
|
func thompson(re string) *State {
|
||||||
nfa := make([]State, 0) // Stack of states
|
nfa := make([]*State, 0) // Stack of states
|
||||||
for _, c := range re {
|
for _, c := range re {
|
||||||
if isAlphaNum(c) {
|
if isAlphaNum(c) {
|
||||||
state := State{}
|
state := State{}
|
||||||
@@ -112,7 +112,7 @@ func thompson(re string) State {
|
|||||||
state.output = make([]*State, 0)
|
state.output = make([]*State, 0)
|
||||||
state.output = append(state.output, &state)
|
state.output = append(state.output, &state)
|
||||||
state.isEmpty = false
|
state.isEmpty = false
|
||||||
nfa = append(nfa, state)
|
nfa = append(nfa, &state)
|
||||||
}
|
}
|
||||||
// Must be an operator if it isn't alphanumeric
|
// Must be an operator if it isn't alphanumeric
|
||||||
switch c {
|
switch c {
|
||||||
@@ -120,31 +120,31 @@ func thompson(re string) State {
|
|||||||
s2 := pop(&nfa)
|
s2 := pop(&nfa)
|
||||||
s1 := pop(&nfa)
|
s1 := pop(&nfa)
|
||||||
for i := range s1.output {
|
for i := range s1.output {
|
||||||
s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], &s2)
|
s1.output[i].transitions[s2.content] = append(s1.output[i].transitions[s2.content], s2)
|
||||||
}
|
}
|
||||||
s1.output = s2.output
|
s1.output = s2.output
|
||||||
nfa = append(nfa, s1)
|
nfa = append(nfa, s1)
|
||||||
case '*':
|
case '*':
|
||||||
s1 := pop(&nfa)
|
s1 := pop(&nfa)
|
||||||
for i := range s1.output {
|
for i := range s1.output {
|
||||||
s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], &s1)
|
s1.output[i].transitions[s1.content] = append(s1.output[i].transitions[s1.content], s1)
|
||||||
}
|
}
|
||||||
// Reset output to s1 (in case s1 was a union operator state, which has multiple outputs)
|
// Reset output to s1 (in case s1 was a union operator state, which has multiple outputs)
|
||||||
s1.output = nil
|
s1.output = nil
|
||||||
s1.output = append(s1.output, &s1)
|
s1.output = append(s1.output, s1)
|
||||||
nfa = append(nfa, s1)
|
nfa = append(nfa, s1)
|
||||||
case '|':
|
case '|':
|
||||||
s1 := pop(&nfa)
|
s1 := pop(&nfa)
|
||||||
s2 := pop(&nfa)
|
s2 := pop(&nfa)
|
||||||
s3 := State{}
|
s3 := State{}
|
||||||
s3.transitions = make(map[int][]*State)
|
s3.transitions = make(map[int][]*State)
|
||||||
s3.output = append(s3.output, &s1, &s2)
|
s3.output = append(s3.output, s1, s2)
|
||||||
s3.transitions[s1.content] = append(s3.transitions[s1.content], &s1)
|
s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
|
||||||
s3.transitions[s2.content] = append(s3.transitions[s2.content], &s2)
|
s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
|
||||||
s3.content = UNION
|
s3.content = UNION
|
||||||
s3.isEmpty = true
|
s3.isEmpty = true
|
||||||
|
|
||||||
nfa = append(nfa, s3)
|
nfa = append(nfa, &s3)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(nfa) != 1 {
|
if len(nfa) != 1 {
|
||||||
@@ -160,13 +160,14 @@ func thompson(re string) State {
|
|||||||
func main() {
|
func main() {
|
||||||
if len(os.Args) < 3 {
|
if len(os.Args) < 3 {
|
||||||
fmt.Println("ERROR: Missing cmdline args")
|
fmt.Println("ERROR: Missing cmdline args")
|
||||||
|
os.Exit(22)
|
||||||
}
|
}
|
||||||
var re string
|
var re string
|
||||||
re = os.Args[1]
|
re = os.Args[1]
|
||||||
re_postfix := shuntingYard(re)
|
re_postfix := shuntingYard(re)
|
||||||
fmt.Println(re_postfix)
|
fmt.Println(re_postfix)
|
||||||
start := thompson(re_postfix)
|
start := thompson(re_postfix)
|
||||||
s, e, matched := match(&start, os.Args[2])
|
s, e, matched := match(start, os.Args[2])
|
||||||
if matched {
|
if matched {
|
||||||
fmt.Printf("Matched from %d to %d\n", s, e)
|
fmt.Printf("Matched from %d to %d\n", s, e)
|
||||||
} else {
|
} else {
|
||||||
|
24
matching.go
24
matching.go
@@ -31,22 +31,30 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
|
|||||||
for _, state := range currentStates {
|
for _, state := range currentStates {
|
||||||
if len(state.transitions[int(str[i])]) > 0 {
|
if len(state.transitions[int(str[i])]) > 0 {
|
||||||
tempStates = append(tempStates, state.transitions[int(str[i])]...)
|
tempStates = append(tempStates, state.transitions[int(str[i])]...)
|
||||||
}
|
} else {
|
||||||
}
|
// This enables the 'greedy' behavior - last-state status is only checked if we can't match anything else
|
||||||
copy(currentStates, tempStates)
|
|
||||||
tempStates = nil
|
|
||||||
|
|
||||||
// If any of the current-states is a last state, return true.
|
|
||||||
for _, state := range currentStates {
|
|
||||||
if state.isLast {
|
if state.isLast {
|
||||||
endIdx = i
|
endIdx = i
|
||||||
return startIdx, endIdx, true
|
return startIdx, endIdx, true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
copy(currentStates, tempStates)
|
||||||
|
tempStates = nil
|
||||||
|
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
// We don't seem to have reached a last-state. Return fals
|
// End-of-string reached. Check if any of our states is in the end position.
|
||||||
|
for _, state := range currentStates {
|
||||||
|
if state.isLast {
|
||||||
|
endIdx = i
|
||||||
|
return startIdx, endIdx, true
|
||||||
|
} else {
|
||||||
return -1, -1, false
|
return -1, -1, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default
|
||||||
|
return -1, -1, false
|
||||||
}
|
}
|
||||||
|
4
nfa.go
4
nfa.go
@@ -39,6 +39,6 @@ func verifyLastStatesHelper(state *State, visited map[*State]bool) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// verifyLastStates penables the 'isLast' flag for the leaf nodes (last states)
|
// verifyLastStates penables the 'isLast' flag for the leaf nodes (last states)
|
||||||
func verifyLastStates(start []State) {
|
func verifyLastStates(start []*State) {
|
||||||
verifyLastStatesHelper(&start[0], make(map[*State]bool))
|
verifyLastStatesHelper(start[0], make(map[*State]bool))
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user