Updated CONCAT to be a metacharacter instead of just a tilde, and renamed it to avoid exporting
This commit is contained in:
		| @@ -18,7 +18,7 @@ type Reg struct { | |||||||
| 	numGroups int | 	numGroups int | ||||||
| } | } | ||||||
|  |  | ||||||
| const CONCAT rune = '~' | const concatRune rune = 0xF0001 | ||||||
|  |  | ||||||
| // Flags for shuntingYard - control its behavior | // Flags for shuntingYard - control its behavior | ||||||
| type ReFlag int | type ReFlag int | ||||||
| @@ -31,7 +31,7 @@ const ( | |||||||
| ) | ) | ||||||
|  |  | ||||||
| func isOperator(c rune) bool { | func isOperator(c rune) bool { | ||||||
| 	if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT { | 	if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune { | ||||||
| 		return true | 		return true | ||||||
| 	} | 	} | ||||||
| 	return false | 	return false | ||||||
| @@ -39,7 +39,7 @@ func isOperator(c rune) bool { | |||||||
|  |  | ||||||
| /* priority returns the priority of the given operator */ | /* priority returns the priority of the given operator */ | ||||||
| func priority(op rune) int { | func priority(op rune) int { | ||||||
| 	precedence := []rune{'|', CONCAT, '+', '*', '?'} | 	precedence := []rune{'|', concatRune, '+', '*', '?'} | ||||||
| 	return slices.Index(precedence, op) | 	return slices.Index(precedence, op) | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -320,7 +320,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { | |||||||
| 		if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != NONCAPLPAREN_CHAR && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped | 		if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != NONCAPLPAREN_CHAR && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped | ||||||
| 			if i < len(re_runes)-1 { | 			if i < len(re_runes)-1 { | ||||||
| 				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' { | 				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' { | ||||||
| 					re_postfix = append(re_postfix, CONCAT) | 					re_postfix = append(re_postfix, concatRune) | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|   | |||||||
| @@ -8,16 +8,16 @@ import ( | |||||||
| var whitespaceChars = []rune{' ', '\t', '\n'} | var whitespaceChars = []rune{' ', '\t', '\n'} | ||||||
| var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} | var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} | ||||||
| var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_") | var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_") | ||||||
| var LBRACKET rune = 0xF0001 | var LBRACKET rune = 0xF0002 | ||||||
| var RBRACKET rune = 0xF0002 | var RBRACKET rune = 0xF0003 | ||||||
| var ANY_CHAR rune = 0xF0003    // Represents any character - used for states where the allChars flag is on. | var ANY_CHAR rune = 0xF0004    // Represents any character - used for states where the allChars flag is on. | ||||||
| var LPAREN_CHAR rune = 0xF0004 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses | var LPAREN_CHAR rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses | ||||||
| var RPAREN_CHAR rune = 0xF0005 | var RPAREN_CHAR rune = 0xF0006 | ||||||
| var NONCAPLPAREN_CHAR rune = 0xF0006 // Represents a non-capturing group's LPAREN | var NONCAPLPAREN_CHAR rune = 0xF0007 // Represents a non-capturing group's LPAREN | ||||||
| var ESC_BACKSLASH rune = 0xF0007     // Represents an escaped backslash | var ESC_BACKSLASH rune = 0xF0008     // Represents an escaped backslash | ||||||
| var CHAR_RANGE rune = 0xF0008        // Represents a character range | var CHAR_RANGE rune = 0xF0009        // Represents a character range | ||||||
|  |  | ||||||
| var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', CONCAT, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR} | var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR} | ||||||
|  |  | ||||||
| // An interface for int and rune, which are identical | // An interface for int and rune, which are identical | ||||||
| type character interface { | type character interface { | ||||||
|   | |||||||
| @@ -154,7 +154,7 @@ func newPostfixNode(contents ...rune) postfixNode { | |||||||
| 			to_return.nodetype = kleeneNode | 			to_return.nodetype = kleeneNode | ||||||
| 		case '|': | 		case '|': | ||||||
| 			to_return.nodetype = pipeNode | 			to_return.nodetype = pipeNode | ||||||
| 		case CONCAT: | 		case concatRune: | ||||||
| 			to_return.nodetype = concatenateNode | 			to_return.nodetype = concatenateNode | ||||||
| 		case '^', '$': | 		case '^', '$': | ||||||
| 			to_return.nodetype = assertionNode | 			to_return.nodetype = assertionNode | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user