Compare commits
5 Commits
implementU
...
277cbc0fc5
Author | SHA1 | Date | |
---|---|---|---|
277cbc0fc5 | |||
3924502b72 | |||
36b009747b | |||
6cd0a10a8f | |||
69fb96c43d |
@@ -64,7 +64,7 @@ const (
|
||||
)
|
||||
|
||||
func isOperator(c rune) bool {
|
||||
if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune {
|
||||
if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune || c == lazyPlusRune || c == lazyKleeneRune || c == lazyQuestionRune {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -72,7 +72,7 @@ func isOperator(c rune) bool {
|
||||
|
||||
/* priority returns the priority of the given operator */
|
||||
func priority(op rune) int {
|
||||
precedence := []rune{'|', concatRune, '+', '*', '?'}
|
||||
precedence := []rune{'|', concatRune, '+', lazyPlusRune, '*', lazyKleeneRune, '?', lazyQuestionRune}
|
||||
return slices.Index(precedence, op)
|
||||
}
|
||||
|
||||
@@ -208,9 +208,6 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
// metacharacter. Later, in thompson(), these will be converted back. This avoids
|
||||
// confusion in detecting whether a character is escaped eg. detecting
|
||||
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
||||
//
|
||||
// 5. Check for non-greedy operators. These are not supported at the moment, so an error
|
||||
// must be thrown if the user attempts to use a non-greedy operator.
|
||||
for i := 0; i < len(re_runes_orig); i++ {
|
||||
c := re_runes_orig[i]
|
||||
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
|
||||
@@ -257,8 +254,16 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
|
||||
re_runes = append(re_runes, rbracketRune)
|
||||
continue
|
||||
} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
|
||||
return nil, fmt.Errorf("non-greedy operators are not supported")
|
||||
} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i > 0 && re_runes_orig[i-1] != '\\') && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
|
||||
switch c {
|
||||
case '+':
|
||||
re_runes = append(re_runes, lazyPlusRune)
|
||||
case '*':
|
||||
re_runes = append(re_runes, lazyKleeneRune)
|
||||
case '?':
|
||||
re_runes = append(re_runes, lazyQuestionRune)
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
re_runes = append(re_runes, c)
|
||||
}
|
||||
@@ -421,7 +426,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
}
|
||||
if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != nonCapLparenRune && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
|
||||
if i < len(re_runes)-1 {
|
||||
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
|
||||
if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != lazyKleeneRune && re_runes[i+1] != '+' && re_runes[i+1] != lazyPlusRune && re_runes[i+1] != '?' && re_runes[i+1] != lazyQuestionRune && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
|
||||
re_postfix = append(re_postfix, concatRune)
|
||||
}
|
||||
}
|
||||
|
@@ -153,8 +153,9 @@ returns the 0-group.
|
||||
The following features from [regexp] are (currently) NOT supported:
|
||||
1. Named capturing groups
|
||||
2. Non-greedy operators
|
||||
3. Embedded flags (flags are instead passed as arguments to [Compile])
|
||||
4. Literal text with \Q ... \E
|
||||
3. Negated POSIX classes
|
||||
4. Embedded flags (flags are instead passed as arguments to [Compile])
|
||||
5. Literal text with \Q ... \E
|
||||
|
||||
The following features are not available in [regexp], but are supported in my engine:
|
||||
1. Lookarounds
|
||||
|
@@ -16,8 +16,11 @@ var rparenRune rune = 0xF0006
|
||||
var nonCapLparenRune rune = 0xF0007 // Represents a non-capturing group's LPAREN
|
||||
var escBackslashRune rune = 0xF0008 // Represents an escaped backslash
|
||||
var charRangeRune rune = 0xF0009 // Represents a character range
|
||||
var lazyKleeneRune rune = 0xF000A // Represents a lazy kleene star
|
||||
var lazyPlusRune rune = 0xF000B // Represents a lazy plus operator
|
||||
var lazyQuestionRune rune = 0xF000C // Represents a lazy question operator
|
||||
|
||||
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
|
||||
var specialChars = []rune{'?', lazyQuestionRune, '*', lazyKleeneRune, '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', lazyPlusRune, '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
|
||||
|
||||
// An interface for int and rune, which are identical
|
||||
type character interface {
|
||||
|
@@ -44,6 +44,7 @@ type postfixNode struct {
|
||||
lookaroundDir int // Lookbehind or lookahead
|
||||
nodeContents []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
|
||||
referencedGroup int // ONLY USED WHEN nodetype == backreferenceNode. Holds the group which this one refers to. After parsing is done, the expression will be rewritten eg. (a)\1 will become (a)(a). So the return value of ShuntingYard() shouldn't contain a backreferenceNode.
|
||||
isLazy bool // ONLY USED WHEN nodetype == kleene or question
|
||||
}
|
||||
|
||||
// Converts the given list of postfixNodes to one node of type CHARCLASS.
|
||||
@@ -162,10 +163,19 @@ func newPostfixNode(contents ...rune) postfixNode {
|
||||
switch contents[0] {
|
||||
case '+':
|
||||
to_return.nodetype = plusNode
|
||||
case lazyPlusRune:
|
||||
to_return.nodetype = plusNode
|
||||
to_return.isLazy = true
|
||||
case '?':
|
||||
to_return.nodetype = questionNode
|
||||
case lazyQuestionRune:
|
||||
to_return.nodetype = questionNode
|
||||
to_return.isLazy = true
|
||||
case '*':
|
||||
to_return.nodetype = kleeneNode
|
||||
case lazyKleeneRune:
|
||||
to_return.nodetype = kleeneNode
|
||||
to_return.isLazy = true
|
||||
case '|':
|
||||
to_return.nodetype = pipeNode
|
||||
case concatRune:
|
||||
|
Reference in New Issue
Block a user