@ -7,6 +7,9 @@ import (
// Holds a list of all characters that are _not_ matched by the dot metacharacter
var notDotChars []rune
// A Reg represents the result of compiling a regular expression. It contains
// the startState of the NFA representation of the regex, and the number of capturing
// groups in the regex.
@ -17,6 +20,15 @@ type Reg struct {
const CONCAT rune = '~'
// Flags for shuntingYard - control its behavior
type ReFlag int
const (
RE_NO_FLAGS ReFlag = iota
func isOperator(c rune) bool {
if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT {
return true
@ -35,9 +47,24 @@ The Shunting-Yard algorithm is used to convert the given infix (regeular) expres
The primary benefit of this is getting rid of parentheses.
It also inserts explicit concatenation operators to make parsing easier in Thompson's algorithm.
An error can be returned for a multitude of reasons - the reason is specified in the error string.
The function also takes in 0 or more flags, which control the behavior of the parser.
See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-with-the-shunting-yard-algorithm/
func shuntingYard(re string) ([]postfixNode, error) {
func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// Check which flags are enabled
caseInsensitive := false
// In Multiline mode, the newline character is considered a
// 'dot' character ie. the dot metacharacter matches a newline as well.
if slices.Contains(flags, RE_MULTILINE) {
notDotChars = []rune{}
} else {
notDotChars = []rune{'\n'}
if slices.Contains(flags, RE_CASE_INSENSITIVE) {
caseInsensitive = true
re_postfix := make([]rune, 0)
// Convert the string to a slice of runes to allow iteration through it
re_runes_orig := []rune(re) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges)
@ -169,7 +196,7 @@ func shuntingYard(re string) ([]postfixNode, error) {
if i >= len(re_runes) {
return nil, fmt.Errorf("Unclosed lookaround.")
if re_runes[i] == '(' {
if re_runes[i] == '(' || re_runes[i] == NONCAPLPAREN_CHAR {
if re_runes[i] == ')' {
@ -213,7 +240,7 @@ func shuntingYard(re string) ([]postfixNode, error) {
c := re_postfix[i]
if isNormalChar(c) {
if caseInsensitiveFlag != nil && *caseInsensitiveFlag {
if caseInsensitive {
outQueue = append(outQueue, newPostfixNode(allCases(c)...))
} else {
outQueue = append(outQueue, newPostfixNode(c))
@ -249,7 +276,7 @@ func shuntingYard(re string) ([]postfixNode, error) {
if i >= len(re_postfix) {
return nil, fmt.Errorf("Unclosed lookaround.")
if re_postfix[i] == '(' {
if re_postfix[i] == '(' || re_postfix[i] == NONCAPLPAREN_CHAR {
if re_postfix[i] == ')' {
@ -605,8 +632,9 @@ func thompson(re []postfixNode) (Reg, error) {
// Compiles the given regular expression into a Reg type, suitable for use with the
// matching functions. The second return value is non-nil if a compilation error has
// occured. As such, the error value must be checked before using the Reg returned by this function.
func Compile(re string) (Reg, error) {
nodes, err := shuntingYard(re)
// The second parameter is an optional list of flags, passed to the parsing function shuntingYard.
func Compile(re string, flags ...ReFlag) (Reg, error) {
nodes, err := shuntingYard(re, flags...)
if err != nil {
return Reg{}, fmt.Errorf("Error parsing regex: %w", err)