From 11dd6aeb7c3bbb0f911eb8cbc414b18b40f53803 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Wed, 23 Oct 2024 10:26:50 -0400 Subject: [PATCH] More Kleene star fixes --- go.mod | 2 +- main.go | 8 +++++++- matching.go | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index b422ea0..0e521ea 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module re go 1.23.1 require ( - github.com/fatih/color v1.18.0 // indirect + github.com/fatih/color v1.18.0 github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect golang.org/x/sys v0.25.0 // indirect diff --git a/main.go b/main.go index 1ca719b..3be83e8 100644 --- a/main.go +++ b/main.go @@ -143,7 +143,8 @@ func thompson(re string) *State { s2 := pop(&nfa) s3 := State{} s3.transitions = make(map[int][]*State) - s3.output = append(s3.output, s1, s2) + s3.output = append(s3.output, s1.output...) + s3.output = append(s3.output, s2.output...) s3.transitions[s1.content] = append(s3.transitions[s1.content], s1) s3.transitions[s2.content] = append(s3.transitions[s2.content], s2) s3.content = EPSILON @@ -163,6 +164,11 @@ func thompson(re string) *State { } func main() { + // Process: + // 1. Convert regex into postfix notation (Shunting-Yard algorithm) + // a. Add explicit concatenation operators to facilitate this + // 2. Build NFA from postfix representation (Thompson's algorithm) + // 3. Run the string against the NFA if len(os.Args) < 3 { fmt.Println("ERROR: Missing cmdline args") os.Exit(22) diff --git a/matching.go b/matching.go index fc060ac..f9c7645 100644 --- a/matching.go +++ b/matching.go @@ -1,5 +1,22 @@ package main +// takeZeroState takes the 0-state (if such a transition exists) for all states in the +// given slice. It returns the resulting states. If any of the resulting states is a 0-state, +// the second parameter is true. +func takeZeroState(states []*State) (rtv []*State, isZero bool) { + for _, state := range states { + if len(state.transitions[EPSILON]) > 0 { + rtv = append(rtv, state.transitions[EPSILON]...) + } + } + for _, state := range rtv { + if len(state.transitions[EPSILON]) > 0 { + return rtv, true + } + } + return rtv, false +} + // match tries to match the regex represented by given start-state, with // the given string func match(start *State, str string) (startIdx int, endIdx int, matched bool) { @@ -18,14 +35,16 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) { // Main loop for i < len(str) { - // If there are any 0-transitions, take those - // TODO: Maybe I need to keep taking 0-transitions until I don't have anymore. Needs to be tested - for _, state := range currentStates { - if len(state.transitions[EPSILON]) > 0 { - tempStates = append(tempStates, state.transitions[EPSILON]...) - } + zeroStates := make([]*State, 0) + // Keep taking zero-states, until there are no more left to take + zeroStates, isZero := takeZeroState(currentStates) + tempStates = append(tempStates, zeroStates...) + for isZero == true { + zeroStates, isZero = takeZeroState(tempStates) + tempStates = append(tempStates, zeroStates...) } - copy(currentStates, tempStates) + + currentStates = append(currentStates, tempStates...) tempStates = nil // Take any transitions corresponding to current character @@ -40,6 +59,7 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) { } } } + currentStates = make([]*State, len(tempStates)) copy(currentStates, tempStates) tempStates = nil @@ -52,7 +72,7 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) { tempStates = append(tempStates, state.transitions[EPSILON]...) } } - copy(currentStates, tempStates) + currentStates = append(currentStates, tempStates...) tempStates = nil for _, state := range currentStates {