More Kleene star fixes

master
Aadhavan Srinivasan 2 months ago
parent 9d3bc2b804
commit 11dd6aeb7c

@ -3,7 +3,7 @@ module re
go 1.23.1 go 1.23.1
require ( require (
github.com/fatih/color v1.18.0 // indirect github.com/fatih/color v1.18.0
github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-isatty v0.0.20 // indirect
golang.org/x/sys v0.25.0 // indirect golang.org/x/sys v0.25.0 // indirect

@ -143,7 +143,8 @@ func thompson(re string) *State {
s2 := pop(&nfa) s2 := pop(&nfa)
s3 := State{} s3 := State{}
s3.transitions = make(map[int][]*State) s3.transitions = make(map[int][]*State)
s3.output = append(s3.output, s1, s2) s3.output = append(s3.output, s1.output...)
s3.output = append(s3.output, s2.output...)
s3.transitions[s1.content] = append(s3.transitions[s1.content], s1) s3.transitions[s1.content] = append(s3.transitions[s1.content], s1)
s3.transitions[s2.content] = append(s3.transitions[s2.content], s2) s3.transitions[s2.content] = append(s3.transitions[s2.content], s2)
s3.content = EPSILON s3.content = EPSILON
@ -163,6 +164,11 @@ func thompson(re string) *State {
} }
func main() { func main() {
// Process:
// 1. Convert regex into postfix notation (Shunting-Yard algorithm)
// a. Add explicit concatenation operators to facilitate this
// 2. Build NFA from postfix representation (Thompson's algorithm)
// 3. Run the string against the NFA
if len(os.Args) < 3 { if len(os.Args) < 3 {
fmt.Println("ERROR: Missing cmdline args") fmt.Println("ERROR: Missing cmdline args")
os.Exit(22) os.Exit(22)

@ -1,5 +1,22 @@
package main package main
// takeZeroState takes the 0-state (if such a transition exists) for all states in the
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// the second parameter is true.
func takeZeroState(states []*State) (rtv []*State, isZero bool) {
for _, state := range states {
if len(state.transitions[EPSILON]) > 0 {
rtv = append(rtv, state.transitions[EPSILON]...)
}
}
for _, state := range rtv {
if len(state.transitions[EPSILON]) > 0 {
return rtv, true
}
}
return rtv, false
}
// match tries to match the regex represented by given start-state, with // match tries to match the regex represented by given start-state, with
// the given string // the given string
func match(start *State, str string) (startIdx int, endIdx int, matched bool) { func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
@ -18,14 +35,16 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
// Main loop // Main loop
for i < len(str) { for i < len(str) {
// If there are any 0-transitions, take those zeroStates := make([]*State, 0)
// TODO: Maybe I need to keep taking 0-transitions until I don't have anymore. Needs to be tested // Keep taking zero-states, until there are no more left to take
for _, state := range currentStates { zeroStates, isZero := takeZeroState(currentStates)
if len(state.transitions[EPSILON]) > 0 { tempStates = append(tempStates, zeroStates...)
tempStates = append(tempStates, state.transitions[EPSILON]...) for isZero == true {
} zeroStates, isZero = takeZeroState(tempStates)
tempStates = append(tempStates, zeroStates...)
} }
copy(currentStates, tempStates)
currentStates = append(currentStates, tempStates...)
tempStates = nil tempStates = nil
// Take any transitions corresponding to current character // Take any transitions corresponding to current character
@ -40,6 +59,7 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
} }
} }
} }
currentStates = make([]*State, len(tempStates))
copy(currentStates, tempStates) copy(currentStates, tempStates)
tempStates = nil tempStates = nil
@ -52,7 +72,7 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
tempStates = append(tempStates, state.transitions[EPSILON]...) tempStates = append(tempStates, state.transitions[EPSILON]...)
} }
} }
copy(currentStates, tempStates) currentStates = append(currentStates, tempStates...)
tempStates = nil tempStates = nil
for _, state := range currentStates { for _, state := range currentStates {

Loading…
Cancel
Save