Updated nodeType constants so that they aren't exported
This commit is contained in:
@@ -450,7 +450,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
}
|
||||
// 'regex' should now contain the lookaround regex, plus the characters at the start (which indicate pos/neg, ahead/behind)
|
||||
// Now we should filter that out.
|
||||
toAppend := postfixNode{nodetype: ASSERTION, startReps: 1, endReps: 1}
|
||||
toAppend := postfixNode{nodetype: assertionNode, startReps: 1, endReps: 1}
|
||||
if regex[0] == '<' { // Lookbehind
|
||||
toAppend.lookaroundDir = LOOKBEHIND
|
||||
regex = regex[1:]
|
||||
@@ -489,7 +489,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
topStack, _ = peek(opStack)
|
||||
}
|
||||
outQueueFinalElement, _ := peek(outQueue)
|
||||
if (c == '*' && outQueueFinalElement.nodetype == KLEENE) || (c == '+' && outQueueFinalElement.nodetype == PLUS) { // You cannot apply a quantifier to a quantifier in this way
|
||||
if (c == '*' && outQueueFinalElement.nodetype == kleeneNode) || (c == '+' && outQueueFinalElement.nodetype == plusNode) { // You cannot apply a quantifier to a quantifier in this way
|
||||
return nil, fmt.Errorf("illegal use of token '%c'", c)
|
||||
}
|
||||
opStack = append(opStack, c)
|
||||
@@ -751,7 +751,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
|
||||
idx := len(outQueue) - 1
|
||||
// Get the last added node
|
||||
if idx < 0 || outQueue[idx].nodetype == LPAREN {
|
||||
if idx < 0 || outQueue[idx].nodetype == lparenNode {
|
||||
return nil, fmt.Errorf("numeric specifier with no content")
|
||||
}
|
||||
outQueue[idx].startReps = startRangeNum
|
||||
@@ -814,7 +814,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
}
|
||||
|
||||
for _, c := range re {
|
||||
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
|
||||
if c.nodetype == characterNode || c.nodetype == assertionNode {
|
||||
state := State{}
|
||||
state.transitions = make(map[int][]*State)
|
||||
if c.allChars {
|
||||
@@ -865,7 +865,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
state.output = make([]*State, 0)
|
||||
state.output = append(state.output, &state)
|
||||
state.isEmpty = false
|
||||
if c.nodetype == ASSERTION {
|
||||
if c.nodetype == assertionNode {
|
||||
state.isEmpty = true // This is a little weird. A lookaround has the 'isEmpty' flag set, even though it _isn't_ empty (the contents are the regex). But, there's so much error-checking that relies on this flag that it's better to keep it this way.
|
||||
state.content = newContents(EPSILON) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string
|
||||
if c.lookaroundDir == 0 || c.lookaroundSign == 0 {
|
||||
@@ -917,7 +917,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
|
||||
nfa = append(nfa, &state)
|
||||
}
|
||||
if c.nodetype == LPAREN || c.nodetype == RPAREN {
|
||||
if c.nodetype == lparenNode || c.nodetype == rparenNode {
|
||||
s := &State{}
|
||||
s.assert = NONE
|
||||
s.content = newContents(EPSILON)
|
||||
@@ -926,7 +926,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
s.output = append(s.output, s)
|
||||
s.transitions = make(map[int][]*State)
|
||||
// LPAREN nodes are just added normally
|
||||
if c.nodetype == LPAREN {
|
||||
if c.nodetype == lparenNode {
|
||||
numGroups++
|
||||
s.groupBegin = true
|
||||
s.groupNum = numGroups
|
||||
@@ -940,7 +940,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
// If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN
|
||||
// and RPAREN nodes.
|
||||
// If neither node exists, that's a problem so I return an error.
|
||||
if c.nodetype == RPAREN {
|
||||
if c.nodetype == rparenNode {
|
||||
s.groupEnd = true
|
||||
middleNode, err1 := pop(&nfa)
|
||||
lparenNode, err2 := pop(&nfa)
|
||||
@@ -969,7 +969,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated
|
||||
if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated
|
||||
// Map the list of nodes to a list of states, each state containing the contents of a specific node
|
||||
states := funcMap(c.nodeContents, func(node postfixNode) *State {
|
||||
s := newState()
|
||||
@@ -996,7 +996,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
}
|
||||
// Must be an operator if it isn't a character
|
||||
switch c.nodetype {
|
||||
case CONCATENATE:
|
||||
case concatenateNode:
|
||||
s2 := mustPop(&nfa)
|
||||
// Relax the requirements for concatenation a little bit - If
|
||||
// the second element is not found ie. the postfixNodes look
|
||||
@@ -1008,7 +1008,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
s1 = concatenate(s1, s2)
|
||||
nfa = append(nfa, s1)
|
||||
}
|
||||
case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
|
||||
case kleeneNode: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
|
||||
s1, err := pop(&nfa)
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("error applying kleene star")
|
||||
@@ -1018,7 +1018,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
return Reg{}, err
|
||||
}
|
||||
nfa = append(nfa, stateToAdd)
|
||||
case PLUS: // a+ is equivalent to aa*
|
||||
case plusNode: // a+ is equivalent to aa*
|
||||
s1 := mustPop(&nfa)
|
||||
s2, err := kleene(*s1)
|
||||
if err != nil {
|
||||
@@ -1026,14 +1026,14 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
}
|
||||
s1 = concatenate(s1, s2)
|
||||
nfa = append(nfa, s1)
|
||||
case QUESTION: // ab? is equivalent to a(b|)
|
||||
case questionNode: // ab? is equivalent to a(b|)
|
||||
s1, err := pop(&nfa)
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("error applying question operator")
|
||||
}
|
||||
s2 := question(s1)
|
||||
nfa = append(nfa, s2)
|
||||
case PIPE:
|
||||
case pipeNode:
|
||||
// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
|
||||
// it is replaced with an implicit 'matchZeroLength' state (this is the same thing that we add at the top if our
|
||||
// input has zero postfixNodes).
|
||||
|
@@ -10,16 +10,16 @@ var escapedChars []rune = []rune("wWdDbBnaftrvsS0")
|
||||
|
||||
// This is a list of the possible node types
|
||||
const (
|
||||
CHARACTER NodeType = iota
|
||||
CHARCLASS
|
||||
PIPE
|
||||
CONCATENATE
|
||||
KLEENE
|
||||
QUESTION
|
||||
PLUS
|
||||
ASSERTION
|
||||
LPAREN
|
||||
RPAREN
|
||||
characterNode NodeType = iota
|
||||
charclassNode
|
||||
pipeNode
|
||||
concatenateNode
|
||||
kleeneNode
|
||||
questionNode
|
||||
plusNode
|
||||
assertionNode
|
||||
lparenNode
|
||||
rparenNode
|
||||
)
|
||||
|
||||
// Helper constants for lookarounds
|
||||
@@ -49,11 +49,11 @@ type postfixNode struct {
|
||||
// it will not match.
|
||||
func newCharClassNode(nodes []postfixNode, negated bool) postfixNode {
|
||||
rtv := postfixNode{}
|
||||
rtv.nodetype = CHARCLASS
|
||||
rtv.nodetype = charclassNode
|
||||
rtv.startReps = 1
|
||||
rtv.endReps = 1
|
||||
if negated {
|
||||
rtv.nodetype = CHARACTER
|
||||
rtv.nodetype = characterNode
|
||||
rtv.contents = []rune{ANY_CHAR}
|
||||
rtv.allChars = true
|
||||
rtv.except = nodes
|
||||
@@ -70,55 +70,55 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
|
||||
toReturn.endReps = 1
|
||||
switch c {
|
||||
case 's': // Whitespace
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, whitespaceChars...)
|
||||
case 'S': // Non-whitespace
|
||||
toReturn = newPostfixDotNode()
|
||||
toReturn.except = append([]postfixNode{}, newPostfixNode(whitespaceChars...))
|
||||
case 'd': // Digits
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, digitChars...)
|
||||
case 'D': // Non-digits
|
||||
toReturn = newPostfixDotNode()
|
||||
toReturn.except = append([]postfixNode{}, newPostfixNode(digitChars...))
|
||||
case 'w': // word character
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, wordChars...)
|
||||
case 'W': // Non-word character
|
||||
toReturn = newPostfixDotNode()
|
||||
toReturn.except = append([]postfixNode{}, newPostfixNode(wordChars...))
|
||||
case 'b', 'B':
|
||||
if c == 'b' && inCharClass {
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, rune(8))
|
||||
} else {
|
||||
toReturn.nodetype = ASSERTION
|
||||
toReturn.nodetype = assertionNode
|
||||
toReturn.contents = append(toReturn.contents, c)
|
||||
}
|
||||
case 'n': // Newline character
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, '\n')
|
||||
case '0': // NULL character
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, rune(0))
|
||||
case 'a': // Bell character
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, rune(7))
|
||||
case 'f': // Form feed character
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, rune(12))
|
||||
case 't': // Horizontal tab character
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, rune(9))
|
||||
case 'r': // Carriage return
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, rune(13))
|
||||
case 'v': // Vertical tab
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, rune(11))
|
||||
case '-': // Literal hyphen - only in character class
|
||||
if inCharClass {
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, '-')
|
||||
} else {
|
||||
return postfixNode{}, fmt.Errorf("invalid escape character")
|
||||
@@ -127,7 +127,7 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
|
||||
if isNormalChar(c) { // Normal characters cannot be escaped
|
||||
return postfixNode{}, fmt.Errorf("invalid escape character")
|
||||
}
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, c)
|
||||
}
|
||||
return toReturn, nil
|
||||
@@ -142,36 +142,36 @@ func newPostfixNode(contents ...rune) postfixNode {
|
||||
to_return.startReps = 1
|
||||
to_return.endReps = 1
|
||||
if len(contents) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER
|
||||
to_return.nodetype = CHARACTER
|
||||
to_return.nodetype = characterNode
|
||||
to_return.contents = contents
|
||||
} else { // Node has one element, could be anything
|
||||
switch contents[0] {
|
||||
case '+':
|
||||
to_return.nodetype = PLUS
|
||||
to_return.nodetype = plusNode
|
||||
case '?':
|
||||
to_return.nodetype = QUESTION
|
||||
to_return.nodetype = questionNode
|
||||
case '*':
|
||||
to_return.nodetype = KLEENE
|
||||
to_return.nodetype = kleeneNode
|
||||
case '|':
|
||||
to_return.nodetype = PIPE
|
||||
to_return.nodetype = pipeNode
|
||||
case CONCAT:
|
||||
to_return.nodetype = CONCATENATE
|
||||
to_return.nodetype = concatenateNode
|
||||
case '^', '$':
|
||||
to_return.nodetype = ASSERTION
|
||||
to_return.nodetype = assertionNode
|
||||
case '(':
|
||||
to_return.nodetype = LPAREN
|
||||
to_return.nodetype = lparenNode
|
||||
case ')':
|
||||
to_return.nodetype = RPAREN
|
||||
to_return.nodetype = rparenNode
|
||||
default:
|
||||
to_return.nodetype = CHARACTER
|
||||
to_return.nodetype = characterNode
|
||||
}
|
||||
to_return.contents = append(to_return.contents, contents...)
|
||||
|
||||
// Special cases for LPAREN and RPAREN - they have special characters defined for them
|
||||
if to_return.nodetype == LPAREN {
|
||||
if to_return.nodetype == lparenNode {
|
||||
to_return.contents = []rune{LPAREN_CHAR}
|
||||
}
|
||||
if to_return.nodetype == RPAREN {
|
||||
if to_return.nodetype == rparenNode {
|
||||
to_return.contents = []rune{RPAREN_CHAR}
|
||||
}
|
||||
}
|
||||
@@ -183,7 +183,7 @@ func newPostfixDotNode() postfixNode {
|
||||
toReturn := postfixNode{}
|
||||
toReturn.startReps = 1
|
||||
toReturn.endReps = 1
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.allChars = true
|
||||
toReturn.contents = []rune{ANY_CHAR}
|
||||
return toReturn
|
||||
@@ -194,7 +194,7 @@ func newPostfixCharNode(contents ...rune) postfixNode {
|
||||
toReturn := postfixNode{}
|
||||
toReturn.startReps = 1
|
||||
toReturn.endReps = 1
|
||||
toReturn.nodetype = CHARACTER
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, contents...)
|
||||
return toReturn
|
||||
}
|
||||
|
Reference in New Issue
Block a user