Updated CONCAT to be a metacharacter instead of just a tilde, and renamed it to avoid exporting

master
Aadhavan Srinivasan 3 days ago
parent 93474c5159
commit e01ef48cbc

@ -18,7 +18,7 @@ type Reg struct {
numGroups int
}
const CONCAT rune = '~'
const concatRune rune = 0xF0001
// Flags for shuntingYard - control its behavior
type ReFlag int
@ -31,7 +31,7 @@ const (
)
func isOperator(c rune) bool {
if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT {
if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune {
return true
}
return false
@ -39,7 +39,7 @@ func isOperator(c rune) bool {
/* priority returns the priority of the given operator */
func priority(op rune) int {
precedence := []rune{'|', CONCAT, '+', '*', '?'}
precedence := []rune{'|', concatRune, '+', '*', '?'}
return slices.Index(precedence, op)
}
@ -320,7 +320,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != NONCAPLPAREN_CHAR && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
if i < len(re_runes)-1 {
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
re_postfix = append(re_postfix, CONCAT)
re_postfix = append(re_postfix, concatRune)
}
}
}

@ -8,16 +8,16 @@ import (
var whitespaceChars = []rune{' ', '\t', '\n'}
var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
var LBRACKET rune = 0xF0001
var RBRACKET rune = 0xF0002
var ANY_CHAR rune = 0xF0003 // Represents any character - used for states where the allChars flag is on.
var LPAREN_CHAR rune = 0xF0004 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
var RPAREN_CHAR rune = 0xF0005
var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN
var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash
var CHAR_RANGE rune = 0xF0008 // Represents a character range
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', CONCAT, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
var LBRACKET rune = 0xF0002
var RBRACKET rune = 0xF0003
var ANY_CHAR rune = 0xF0004 // Represents any character - used for states where the allChars flag is on.
var LPAREN_CHAR rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
var RPAREN_CHAR rune = 0xF0006
var NONCAPLPAREN_CHAR rune = 0xF0007 // Represents a non-capturing group's LPAREN
var ESC_BACKSLASH rune = 0xF0008 // Represents an escaped backslash
var CHAR_RANGE rune = 0xF0009 // Represents a character range
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
// An interface for int and rune, which are identical
type character interface {

@ -154,7 +154,7 @@ func newPostfixNode(contents ...rune) postfixNode {
to_return.nodetype = kleeneNode
case '|':
to_return.nodetype = pipeNode
case CONCAT:
case concatRune:
to_return.nodetype = concatenateNode
case '^', '$':
to_return.nodetype = assertionNode

Loading…
Cancel
Save