Updated CONCAT to be a metacharacter instead of just a tilde, and renamed it to avoid exporting

2025-01-30 10:34:03 -05:00
parent 93474c5159
commit e01ef48cbc
3 changed files with 14 additions and 14 deletions
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -18,7 +18,7 @@ type Reg struct {
 	numGroups int
 }
-const CONCAT rune = '~'
+const concatRune rune = 0xF0001
 // Flags for shuntingYard - control its behavior
 type ReFlag int
@@ -31,7 +31,7 @@ const (
 )
 func isOperator(c rune) bool {
-	if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT {
+	if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune {
 		return true
 	}
 	return false
@@ -39,7 +39,7 @@ func isOperator(c rune) bool {
 /* priority returns the priority of the given operator */
 func priority(op rune) int {
-	precedence := []rune{'|', CONCAT, '+', '*', '?'}
+	precedence := []rune{'|', concatRune, '+', '*', '?'}
 	return slices.Index(precedence, op)
 }
@@ -320,7 +320,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != NONCAPLPAREN_CHAR && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
 			if i < len(re_runes)-1 {
 				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
-					re_postfix = append(re_postfix, CONCAT)
+					re_postfix = append(re_postfix, concatRune)
 				}
 			}
 		}
--- a/regex/misc.go
+++ b/regex/misc.go
@@ -8,16 +8,16 @@ import (
 var whitespaceChars = []rune{' ', '\t', '\n'}
 var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
 var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
-var LBRACKET rune = 0xF0001
+var LBRACKET rune = 0xF0002
-var RBRACKET rune = 0xF0002
+var RBRACKET rune = 0xF0003
-var ANY_CHAR rune = 0xF0003    // Represents any character - used for states where the allChars flag is on.
+var ANY_CHAR rune = 0xF0004    // Represents any character - used for states where the allChars flag is on.
-var LPAREN_CHAR rune = 0xF0004 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
+var LPAREN_CHAR rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
-var RPAREN_CHAR rune = 0xF0005
+var RPAREN_CHAR rune = 0xF0006
-var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN
+var NONCAPLPAREN_CHAR rune = 0xF0007 // Represents a non-capturing group's LPAREN
-var ESC_BACKSLASH rune = 0xF0007     // Represents an escaped backslash
+var ESC_BACKSLASH rune = 0xF0008     // Represents an escaped backslash
-var CHAR_RANGE rune = 0xF0008        // Represents a character range
+var CHAR_RANGE rune = 0xF0009        // Represents a character range
-var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', CONCAT, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
+var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
 // An interface for int and rune, which are identical
 type character interface {
--- a/regex/postfixNode.go
+++ b/regex/postfixNode.go
@@ -154,7 +154,7 @@ func newPostfixNode(contents ...rune) postfixNode {
 			to_return.nodetype = kleeneNode
 		case '|':
 			to_return.nodetype = pipeNode
-		case CONCAT:
+		case concatRune:
 			to_return.nodetype = concatenateNode
 		case '^', '$':
 			to_return.nodetype = assertionNode