Added 'flags' to the Compile function, instead of maintaining global state to check whether certain features were enabled
This commit is contained in:
40
compile.go
40
compile.go
@@ -7,6 +7,9 @@ import (
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// Holds a list of all characters that are _not_ matched by the dot metacharacter
|
||||
var notDotChars []rune
|
||||
|
||||
// A Reg represents the result of compiling a regular expression. It contains
|
||||
// the startState of the NFA representation of the regex, and the number of capturing
|
||||
// groups in the regex.
|
||||
@@ -17,6 +20,15 @@ type Reg struct {
|
||||
|
||||
const CONCAT rune = '~'
|
||||
|
||||
// Flags for shuntingYard - control its behavior
|
||||
type ReFlag int
|
||||
|
||||
const (
|
||||
RE_NO_FLAGS ReFlag = iota
|
||||
RE_CASE_INSENSITIVE
|
||||
RE_MULTILINE
|
||||
)
|
||||
|
||||
func isOperator(c rune) bool {
|
||||
if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT {
|
||||
return true
|
||||
@@ -35,9 +47,24 @@ The Shunting-Yard algorithm is used to convert the given infix (regeular) expres
|
||||
The primary benefit of this is getting rid of parentheses.
|
||||
It also inserts explicit concatenation operators to make parsing easier in Thompson's algorithm.
|
||||
An error can be returned for a multitude of reasons - the reason is specified in the error string.
|
||||
The function also takes in 0 or more flags, which control the behavior of the parser.
|
||||
See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-with-the-shunting-yard-algorithm/
|
||||
*/
|
||||
func shuntingYard(re string) ([]postfixNode, error) {
|
||||
func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
// Check which flags are enabled
|
||||
|
||||
caseInsensitive := false
|
||||
// In Multiline mode, the newline character is considered a
|
||||
// 'dot' character ie. the dot metacharacter matches a newline as well.
|
||||
if slices.Contains(flags, RE_MULTILINE) {
|
||||
notDotChars = []rune{}
|
||||
} else {
|
||||
notDotChars = []rune{'\n'}
|
||||
}
|
||||
if slices.Contains(flags, RE_CASE_INSENSITIVE) {
|
||||
caseInsensitive = true
|
||||
}
|
||||
|
||||
re_postfix := make([]rune, 0)
|
||||
// Convert the string to a slice of runes to allow iteration through it
|
||||
re_runes_orig := []rune(re) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges)
|
||||
@@ -169,7 +196,7 @@ func shuntingYard(re string) ([]postfixNode, error) {
|
||||
if i >= len(re_runes) {
|
||||
return nil, fmt.Errorf("Unclosed lookaround.")
|
||||
}
|
||||
if re_runes[i] == '(' {
|
||||
if re_runes[i] == '(' || re_runes[i] == NONCAPLPAREN_CHAR {
|
||||
numOpenParens++
|
||||
}
|
||||
if re_runes[i] == ')' {
|
||||
@@ -213,7 +240,7 @@ func shuntingYard(re string) ([]postfixNode, error) {
|
||||
*/
|
||||
c := re_postfix[i]
|
||||
if isNormalChar(c) {
|
||||
if caseInsensitiveFlag != nil && *caseInsensitiveFlag {
|
||||
if caseInsensitive {
|
||||
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
|
||||
} else {
|
||||
outQueue = append(outQueue, newPostfixNode(c))
|
||||
@@ -249,7 +276,7 @@ func shuntingYard(re string) ([]postfixNode, error) {
|
||||
if i >= len(re_postfix) {
|
||||
return nil, fmt.Errorf("Unclosed lookaround.")
|
||||
}
|
||||
if re_postfix[i] == '(' {
|
||||
if re_postfix[i] == '(' || re_postfix[i] == NONCAPLPAREN_CHAR {
|
||||
numOpenParens++
|
||||
}
|
||||
if re_postfix[i] == ')' {
|
||||
@@ -605,8 +632,9 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
// Compiles the given regular expression into a Reg type, suitable for use with the
|
||||
// matching functions. The second return value is non-nil if a compilation error has
|
||||
// occured. As such, the error value must be checked before using the Reg returned by this function.
|
||||
func Compile(re string) (Reg, error) {
|
||||
nodes, err := shuntingYard(re)
|
||||
// The second parameter is an optional list of flags, passed to the parsing function shuntingYard.
|
||||
func Compile(re string, flags ...ReFlag) (Reg, error) {
|
||||
nodes, err := shuntingYard(re, flags...)
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("Error parsing regex: %w", err)
|
||||
}
|
||||
|
21
main.go
21
main.go
@@ -10,10 +10,10 @@ import (
|
||||
"github.com/fatih/color"
|
||||
)
|
||||
|
||||
var notDotChars []rune
|
||||
var caseInsensitiveFlag *bool // Whether we are running in case-insensitive mode
|
||||
|
||||
func main() {
|
||||
// Flags for the regex Compile function
|
||||
flagsToCompile := make([]ReFlag, 0)
|
||||
|
||||
invertFlag := flag.Bool("v", false, "Invert match.")
|
||||
// This flag has two 'modes':
|
||||
// 1. Without '-v': Prints only matches. Prints a newline after every match.
|
||||
@@ -22,17 +22,18 @@ func main() {
|
||||
lineFlag := flag.Bool("l", false, "Only print lines with a match (or with no matches, if -v is enabled). Similar to grep's default.")
|
||||
multiLineFlag := flag.Bool("t", false, "Multi-line mode. Treats newline just like any character.")
|
||||
printMatchesFlag := flag.Bool("p", false, "Prints start and end index of each match. Can only be used with '-t' for multi-line mode.")
|
||||
caseInsensitiveFlag = flag.Bool("i", false, "Case-insensitive. Disregard the case of all characters.")
|
||||
caseInsensitiveFlag := flag.Bool("i", false, "Case-insensitive. Disregard the case of all characters.")
|
||||
if *caseInsensitiveFlag {
|
||||
flagsToCompile = append(flagsToCompile, RE_CASE_INSENSITIVE)
|
||||
}
|
||||
matchNum := flag.Int("m", 0, "Print the match with the given index. Eg. -m 3 prints the third match.")
|
||||
substituteText := flag.String("s", "", "Substitute the contents of each match with the given string. Overrides -o and -v")
|
||||
flag.Parse()
|
||||
|
||||
// In multi-line mode, 'dot' metacharacter also matches newline
|
||||
if !(*multiLineFlag) {
|
||||
notDotChars = []rune{'\n'}
|
||||
} else {
|
||||
notDotChars = []rune{}
|
||||
if *multiLineFlag {
|
||||
flagsToCompile = append(flagsToCompile, RE_MULTILINE)
|
||||
}
|
||||
|
||||
// -l and -o are mutually exclusive: -o overrides -l
|
||||
if *onlyFlag {
|
||||
*lineFlag = false
|
||||
@@ -74,7 +75,7 @@ func main() {
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
out := bufio.NewWriter(os.Stdout)
|
||||
|
||||
regComp, err := Compile(re)
|
||||
regComp, err := Compile(re, flagsToCompile...)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
|
Reference in New Issue
Block a user