diff --git a/regex/compile.go b/regex/compile.go index 3816242..86c552b 100644 --- a/regex/compile.go +++ b/regex/compile.go @@ -18,7 +18,7 @@ type Reg struct { numGroups int } -const CONCAT rune = '~' +const concatRune rune = 0xF0001 // Flags for shuntingYard - control its behavior type ReFlag int @@ -31,7 +31,7 @@ const ( ) func isOperator(c rune) bool { - if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT { + if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune { return true } return false @@ -39,7 +39,7 @@ func isOperator(c rune) bool { /* priority returns the priority of the given operator */ func priority(op rune) int { - precedence := []rune{'|', CONCAT, '+', '*', '?'} + precedence := []rune{'|', concatRune, '+', '*', '?'} return slices.Index(precedence, op) } @@ -320,7 +320,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != NONCAPLPAREN_CHAR && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped if i < len(re_runes)-1 { if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' { - re_postfix = append(re_postfix, CONCAT) + re_postfix = append(re_postfix, concatRune) } } } diff --git a/regex/misc.go b/regex/misc.go index 8f3c8a6..d9bc33c 100644 --- a/regex/misc.go +++ b/regex/misc.go @@ -8,16 +8,16 @@ import ( var whitespaceChars = []rune{' ', '\t', '\n'} var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_") -var LBRACKET rune = 0xF0001 -var RBRACKET rune = 0xF0002 -var ANY_CHAR rune = 0xF0003 // Represents any character - used for states where the allChars flag is on. -var LPAREN_CHAR rune = 0xF0004 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses -var RPAREN_CHAR rune = 0xF0005 -var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN -var ESC_BACKSLASH rune = 0xF0007 // Represents an escaped backslash -var CHAR_RANGE rune = 0xF0008 // Represents a character range - -var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', CONCAT, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR} +var LBRACKET rune = 0xF0002 +var RBRACKET rune = 0xF0003 +var ANY_CHAR rune = 0xF0004 // Represents any character - used for states where the allChars flag is on. +var LPAREN_CHAR rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses +var RPAREN_CHAR rune = 0xF0006 +var NONCAPLPAREN_CHAR rune = 0xF0007 // Represents a non-capturing group's LPAREN +var ESC_BACKSLASH rune = 0xF0008 // Represents an escaped backslash +var CHAR_RANGE rune = 0xF0009 // Represents a character range + +var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR} // An interface for int and rune, which are identical type character interface { diff --git a/regex/postfixNode.go b/regex/postfixNode.go index 70f3d79..42a2eab 100644 --- a/regex/postfixNode.go +++ b/regex/postfixNode.go @@ -154,7 +154,7 @@ func newPostfixNode(contents ...rune) postfixNode { to_return.nodetype = kleeneNode case '|': to_return.nodetype = pipeNode - case CONCAT: + case concatRune: to_return.nodetype = concatenateNode case '^', '$': to_return.nodetype = assertionNode