Updated CONCAT to be a metacharacter instead of just a tilde, and renamed it to avoid exporting
This commit is contained in:
@@ -18,7 +18,7 @@ type Reg struct {
|
|||||||
numGroups int
|
numGroups int
|
||||||
}
|
}
|
||||||
|
|
||||||
const CONCAT rune = '~'
|
const concatRune rune = 0xF0001
|
||||||
|
|
||||||
// Flags for shuntingYard - control its behavior
|
// Flags for shuntingYard - control its behavior
|
||||||
type ReFlag int
|
type ReFlag int
|
||||||
@@ -31,7 +31,7 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func isOperator(c rune) bool {
|
func isOperator(c rune) bool {
|
||||||
if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT {
|
if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@@ -39,7 +39,7 @@ func isOperator(c rune) bool {
|
|||||||
|
|
||||||
/* priority returns the priority of the given operator */
|
/* priority returns the priority of the given operator */
|
||||||
func priority(op rune) int {
|
func priority(op rune) int {
|
||||||
precedence := []rune{'|', CONCAT, '+', '*', '?'}
|
precedence := []rune{'|', concatRune, '+', '*', '?'}
|
||||||
return slices.Index(precedence, op)
|
return slices.Index(precedence, op)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -320,7 +320,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != NONCAPLPAREN_CHAR && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
|
if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != NONCAPLPAREN_CHAR && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
|
||||||
if i < len(re_runes)-1 {
|
if i < len(re_runes)-1 {
|
||||||
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
|
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
|
||||||
re_postfix = append(re_postfix, CONCAT)
|
re_postfix = append(re_postfix, concatRune)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -8,16 +8,16 @@ import (
|
|||||||
var whitespaceChars = []rune{' ', '\t', '\n'}
|
var whitespaceChars = []rune{' ', '\t', '\n'}
|
||||||
var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
|
var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
|
||||||
var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
|
var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
|
||||||
var LBRACKET rune = 0xF0001
|
var LBRACKET rune = 0xF0002
|
||||||
var RBRACKET rune = 0xF0002
|
var RBRACKET rune = 0xF0003
|
||||||
var ANY_CHAR rune = 0xF0003 // Represents any character - used for states where the allChars flag is on.
|
var ANY_CHAR rune = 0xF0004 // Represents any character - used for states where the allChars flag is on.
|
||||||
var LPAREN_CHAR rune = 0xF0004 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
|
var LPAREN_CHAR rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
|
||||||
var RPAREN_CHAR rune = 0xF0005
|
var RPAREN_CHAR rune = 0xF0006
|
||||||
var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN
|
var NONCAPLPAREN_CHAR rune = 0xF0007 // Represents a non-capturing group's LPAREN
|
||||||
var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash
|
var ESC_BACKSLASH rune = 0xF0008 // Represents an escaped backslash
|
||||||
var CHAR_RANGE rune = 0xF0008 // Represents a character range
|
var CHAR_RANGE rune = 0xF0009 // Represents a character range
|
||||||
|
|
||||||
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', CONCAT, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
|
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
|
||||||
|
|
||||||
// An interface for int and rune, which are identical
|
// An interface for int and rune, which are identical
|
||||||
type character interface {
|
type character interface {
|
||||||
|
@@ -154,7 +154,7 @@ func newPostfixNode(contents ...rune) postfixNode {
|
|||||||
to_return.nodetype = kleeneNode
|
to_return.nodetype = kleeneNode
|
||||||
case '|':
|
case '|':
|
||||||
to_return.nodetype = pipeNode
|
to_return.nodetype = pipeNode
|
||||||
case CONCAT:
|
case concatRune:
|
||||||
to_return.nodetype = concatenateNode
|
to_return.nodetype = concatenateNode
|
||||||
case '^', '$':
|
case '^', '$':
|
||||||
to_return.nodetype = assertionNode
|
to_return.nodetype = assertionNode
|
||||||
|
Reference in New Issue
Block a user