Major restructuring - added new type, changed return types for shuntingYard and thompson

I added a new function 'Compile' that calls shuntingYard and thompson. I also added
a new type 'Reg' that this function returns - it represents the starting state and contains
the number of capturing groups in the regex. I also rewrote shuntingYard and thompson
to return errors instead of panicking.
master
Aadhavan Srinivasan 2 weeks ago
parent ddbcb309b0
commit c8613c1ba2

@ -7,6 +7,29 @@ import (
"unicode" "unicode"
) )
// A Reg represents the result of compiling a regular expression. It contains
// the startState of the NFA representation of the regex, and the number of capturing
// groups in the regex.
type Reg struct {
start *State
numGroups int
}
const CONCAT rune = '~'
func isOperator(c rune) bool {
if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT {
return true
}
return false
}
/* priority returns the priority of the given operator */
func priority(op rune) int {
precedence := []rune{'|', CONCAT, '+', '*', '?'}
return slices.Index(precedence, op)
}
/* /*
The Shunting-Yard algorithm is used to convert the given infix (regeular) expression to postfix. The Shunting-Yard algorithm is used to convert the given infix (regeular) expression to postfix.
The primary benefit of this is getting rid of parentheses. The primary benefit of this is getting rid of parentheses.
@ -416,7 +439,7 @@ func shuntingYard(re string) ([]postfixNode, error) {
// Thompson's algorithm. Constructs Finite-State Automaton from given string. // Thompson's algorithm. Constructs Finite-State Automaton from given string.
// Returns start state and number of groups in regex. // Returns start state and number of groups in regex.
func thompson(re []postfixNode) (*State, int) { func thompson(re []postfixNode) (Reg, error) {
nfa := make([]*State, 0) // Stack of states nfa := make([]*State, 0) // Stack of states
numGroups := 0 // Number of capturing groups numGroups := 0 // Number of capturing groups
for _, c := range re { for _, c := range re {
@ -465,10 +488,16 @@ func thompson(re []postfixNode) (*State, int) {
state.assert = NLB state.assert = NLB
} }
} }
tmpRe := shuntingYard(state.lookaroundRegex) tmpRe, err := shuntingYard(state.lookaroundRegex)
var numGroupsLookaround int if err != nil {
state.lookaroundNFA, numGroupsLookaround = thompson(tmpRe) return Reg{}, fmt.Errorf("Error parsing lookaround: %w", err)
state.lookaroundNumCaptureGroups = numGroupsLookaround }
reg, err := thompson(tmpRe)
if err != nil {
return Reg{}, fmt.Errorf("Error compiling lookaround: %w", err)
}
state.lookaroundNFA = reg.start
state.lookaroundNumCaptureGroups = reg.numGroups
} }
} }
@ -533,7 +562,7 @@ func thompson(re []postfixNode) (*State, int) {
} }
if c.startReps != 1 || c.endReps != 1 { // Must have a numeric specifier attached to it if c.startReps != 1 || c.endReps != 1 { // Must have a numeric specifier attached to it
if c.endReps != -1 && c.endReps < c.startReps { if c.endReps != -1 && c.endReps < c.startReps {
panic("ERROR: Numeric specifier - start greater than end.") return Reg{}, fmt.Errorf("Numeric specifier - start greater than end.")
} }
state := mustPop(&nfa) state := mustPop(&nfa)
var stateToAdd *State = nil var stateToAdd *State = nil
@ -564,11 +593,26 @@ func thompson(re []postfixNode) (*State, int) {
} }
} }
if len(nfa) != 1 { if len(nfa) != 1 {
panic("ERROR: Invalid Regex.") return Reg{}, fmt.Errorf("Invalid Regex.")
} }
verifyLastStates(nfa) verifyLastStates(nfa)
return nfa[0], numGroups return Reg{nfa[0], numGroups}, nil
}
// Compiles the given regular expression into a Reg type, suitable for use with the
// matching functions. The second return value is non-nil if a compilation error has
// occured. As such, the error value must be checked before using the Reg returned by this function.
func Compile(re string) (Reg, error) {
nodes, err := shuntingYard(re)
if err != nil {
return Reg{}, fmt.Errorf("Error parsing regex: %w", err)
}
reg, err := thompson(nodes)
if err != nil {
return Reg{}, fmt.Errorf("Error compiling regex: %w", err)
}
return reg, nil
} }

Loading…
Cancel
Save