Major restructuring - added new type, changed return types for shuntingYard and thompson
I added a new function 'Compile' that calls shuntingYard and thompson. I also added a new type 'Reg' that this function returns - it represents the starting state and contains the number of capturing groups in the regex. I also rewrote shuntingYard and thompson to return errors instead of panicking.
This commit is contained in:
60
compile.go
60
compile.go
@@ -7,6 +7,29 @@ import (
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// A Reg represents the result of compiling a regular expression. It contains
|
||||
// the startState of the NFA representation of the regex, and the number of capturing
|
||||
// groups in the regex.
|
||||
type Reg struct {
|
||||
start *State
|
||||
numGroups int
|
||||
}
|
||||
|
||||
const CONCAT rune = '~'
|
||||
|
||||
func isOperator(c rune) bool {
|
||||
if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/* priority returns the priority of the given operator */
|
||||
func priority(op rune) int {
|
||||
precedence := []rune{'|', CONCAT, '+', '*', '?'}
|
||||
return slices.Index(precedence, op)
|
||||
}
|
||||
|
||||
/*
|
||||
The Shunting-Yard algorithm is used to convert the given infix (regeular) expression to postfix.
|
||||
The primary benefit of this is getting rid of parentheses.
|
||||
@@ -416,7 +439,7 @@ func shuntingYard(re string) ([]postfixNode, error) {
|
||||
|
||||
// Thompson's algorithm. Constructs Finite-State Automaton from given string.
|
||||
// Returns start state and number of groups in regex.
|
||||
func thompson(re []postfixNode) (*State, int) {
|
||||
func thompson(re []postfixNode) (Reg, error) {
|
||||
nfa := make([]*State, 0) // Stack of states
|
||||
numGroups := 0 // Number of capturing groups
|
||||
for _, c := range re {
|
||||
@@ -465,10 +488,16 @@ func thompson(re []postfixNode) (*State, int) {
|
||||
state.assert = NLB
|
||||
}
|
||||
}
|
||||
tmpRe := shuntingYard(state.lookaroundRegex)
|
||||
var numGroupsLookaround int
|
||||
state.lookaroundNFA, numGroupsLookaround = thompson(tmpRe)
|
||||
state.lookaroundNumCaptureGroups = numGroupsLookaround
|
||||
tmpRe, err := shuntingYard(state.lookaroundRegex)
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("Error parsing lookaround: %w", err)
|
||||
}
|
||||
reg, err := thompson(tmpRe)
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("Error compiling lookaround: %w", err)
|
||||
}
|
||||
state.lookaroundNFA = reg.start
|
||||
state.lookaroundNumCaptureGroups = reg.numGroups
|
||||
|
||||
}
|
||||
}
|
||||
@@ -533,7 +562,7 @@ func thompson(re []postfixNode) (*State, int) {
|
||||
}
|
||||
if c.startReps != 1 || c.endReps != 1 { // Must have a numeric specifier attached to it
|
||||
if c.endReps != -1 && c.endReps < c.startReps {
|
||||
panic("ERROR: Numeric specifier - start greater than end.")
|
||||
return Reg{}, fmt.Errorf("Numeric specifier - start greater than end.")
|
||||
}
|
||||
state := mustPop(&nfa)
|
||||
var stateToAdd *State = nil
|
||||
@@ -564,11 +593,26 @@ func thompson(re []postfixNode) (*State, int) {
|
||||
}
|
||||
}
|
||||
if len(nfa) != 1 {
|
||||
panic("ERROR: Invalid Regex.")
|
||||
return Reg{}, fmt.Errorf("Invalid Regex.")
|
||||
}
|
||||
|
||||
verifyLastStates(nfa)
|
||||
|
||||
return nfa[0], numGroups
|
||||
return Reg{nfa[0], numGroups}, nil
|
||||
|
||||
}
|
||||
|
||||
// Compiles the given regular expression into a Reg type, suitable for use with the
|
||||
// matching functions. The second return value is non-nil if a compilation error has
|
||||
// occured. As such, the error value must be checked before using the Reg returned by this function.
|
||||
func Compile(re string) (Reg, error) {
|
||||
nodes, err := shuntingYard(re)
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("Error parsing regex: %w", err)
|
||||
}
|
||||
reg, err := thompson(nodes)
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("Error compiling regex: %w", err)
|
||||
}
|
||||
return reg, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user