Started implementing backreferences (octal values should now be prefaced with \0)
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
package regex
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type nodeType int
|
||||
|
||||
@@ -20,6 +22,7 @@ const (
|
||||
assertionNode
|
||||
lparenNode
|
||||
rparenNode
|
||||
backreferenceNode
|
||||
)
|
||||
|
||||
// Helper constants for lookarounds
|
||||
@@ -31,15 +34,16 @@ const lookbehind = -1
|
||||
var infinite_reps int = -1 // Represents infinite reps eg. the end range in {5,}
|
||||
// This represents a node in the postfix representation of the expression
|
||||
type postfixNode struct {
|
||||
nodetype nodeType
|
||||
contents []rune // Contents of the node
|
||||
startReps int // Minimum number of times the node should be repeated - used with numeric specifiers
|
||||
endReps int // Maximum number of times the node should be repeated - used with numeric specifiers
|
||||
allChars bool // Whether or not the current node represents all characters (eg. dot metacharacter)
|
||||
except []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
|
||||
lookaroundSign int // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
|
||||
lookaroundDir int // Lookbehind or lookahead
|
||||
nodeContents []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
|
||||
nodetype nodeType
|
||||
contents []rune // Contents of the node
|
||||
startReps int // Minimum number of times the node should be repeated - used with numeric specifiers
|
||||
endReps int // Maximum number of times the node should be repeated - used with numeric specifiers
|
||||
allChars bool // Whether or not the current node represents all characters (eg. dot metacharacter)
|
||||
except []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
|
||||
lookaroundSign int // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
|
||||
lookaroundDir int // Lookbehind or lookahead
|
||||
nodeContents []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
|
||||
referencedGroup int // ONLY USED WHEN nodetype == backreferenceNode. Holds the group which this one refers to. After parsing is done, the expression will be rewritten eg. (a)\1 will become (a)(a). So the return value of ShuntingYard() shouldn't contain a backreferenceNode.
|
||||
}
|
||||
|
||||
// Converts the given list of postfixNodes to one node of type CHARCLASS.
|
||||
@@ -208,3 +212,44 @@ func newPostfixCharNode(contents ...rune) postfixNode {
|
||||
toReturn.contents = append(toReturn.contents, contents...)
|
||||
return toReturn
|
||||
}
|
||||
|
||||
// newPostfixBackreferenceNode creates and returns a backreference node, referring to the given group
|
||||
func newPostfixBackreferenceNode(referred int) postfixNode {
|
||||
toReturn := postfixNode{}
|
||||
toReturn.startReps = 1
|
||||
toReturn.endReps = 1
|
||||
toReturn.nodetype = backreferenceNode
|
||||
toReturn.referencedGroup = referred
|
||||
return toReturn
|
||||
}
|
||||
|
||||
// rewriteBackreferences rewrites any backreferences in the given postfixNode slice, into their respective groups.
|
||||
// It stores the relation in a map, and returns it as the second return value.
|
||||
// It uses parenIndices to determine where a group starts and ends in nodes.
|
||||
// For example, \1(a) will be rewritten into (a)(a), and 1 -> 2 will be the hashmap value.
|
||||
// It returns an error if a backreference points to an invalid group.
|
||||
// func rewriteBackreferences(nodes []postfixNode, parenIndices []Group) ([]postfixNode, map[int]int, error) {
|
||||
// rtv := make([]postfixNode, 0)
|
||||
// referMap := make(map[int]int)
|
||||
// numGroups := 0
|
||||
// groupIncrement := 0 // If we have a backreference before the group its referring to, then the group its referring to will have its group number incremented.
|
||||
// for i, node := range nodes {
|
||||
// if node.nodetype == backreferenceNode {
|
||||
// if node.referencedGroup >= len(parenIndices) {
|
||||
// return nil, nil, fmt.Errorf("invalid backreference")
|
||||
// }
|
||||
// rtv = slices.Concat(rtv, nodes[parenIndices[node.referencedGroup].StartIdx:parenIndices[node.referencedGroup].EndIdx+1]) // Add all the nodes in the group to rtv
|
||||
// numGroups += 1
|
||||
// if i < parenIndices[node.referencedGroup].StartIdx {
|
||||
// groupIncrement += 1
|
||||
// }
|
||||
// referMap[numGroups] = node.referencedGroup + groupIncrement
|
||||
// } else {
|
||||
// rtv = append(rtv, node)
|
||||
// if node.nodetype == lparenNode {
|
||||
// numGroups += 1
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return rtv, referMap, nil
|
||||
// }
|
||||
|
Reference in New Issue
Block a user