Updated nodeType constants so that they aren't exported

master
Aadhavan Srinivasan 3 days ago
parent ca8f8e1030
commit 73c6a442ce

@ -450,7 +450,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
// 'regex' should now contain the lookaround regex, plus the characters at the start (which indicate pos/neg, ahead/behind) // 'regex' should now contain the lookaround regex, plus the characters at the start (which indicate pos/neg, ahead/behind)
// Now we should filter that out. // Now we should filter that out.
toAppend := postfixNode{nodetype: ASSERTION, startReps: 1, endReps: 1} toAppend := postfixNode{nodetype: assertionNode, startReps: 1, endReps: 1}
if regex[0] == '<' { // Lookbehind if regex[0] == '<' { // Lookbehind
toAppend.lookaroundDir = LOOKBEHIND toAppend.lookaroundDir = LOOKBEHIND
regex = regex[1:] regex = regex[1:]
@ -489,7 +489,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
topStack, _ = peek(opStack) topStack, _ = peek(opStack)
} }
outQueueFinalElement, _ := peek(outQueue) outQueueFinalElement, _ := peek(outQueue)
if (c == '*' && outQueueFinalElement.nodetype == KLEENE) || (c == '+' && outQueueFinalElement.nodetype == PLUS) { // You cannot apply a quantifier to a quantifier in this way if (c == '*' && outQueueFinalElement.nodetype == kleeneNode) || (c == '+' && outQueueFinalElement.nodetype == plusNode) { // You cannot apply a quantifier to a quantifier in this way
return nil, fmt.Errorf("illegal use of token '%c'", c) return nil, fmt.Errorf("illegal use of token '%c'", c)
} }
opStack = append(opStack, c) opStack = append(opStack, c)
@ -751,7 +751,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
idx := len(outQueue) - 1 idx := len(outQueue) - 1
// Get the last added node // Get the last added node
if idx < 0 || outQueue[idx].nodetype == LPAREN { if idx < 0 || outQueue[idx].nodetype == lparenNode {
return nil, fmt.Errorf("numeric specifier with no content") return nil, fmt.Errorf("numeric specifier with no content")
} }
outQueue[idx].startReps = startRangeNum outQueue[idx].startReps = startRangeNum
@ -814,7 +814,7 @@ func thompson(re []postfixNode) (Reg, error) {
} }
for _, c := range re { for _, c := range re {
if c.nodetype == CHARACTER || c.nodetype == ASSERTION { if c.nodetype == characterNode || c.nodetype == assertionNode {
state := State{} state := State{}
state.transitions = make(map[int][]*State) state.transitions = make(map[int][]*State)
if c.allChars { if c.allChars {
@ -865,7 +865,7 @@ func thompson(re []postfixNode) (Reg, error) {
state.output = make([]*State, 0) state.output = make([]*State, 0)
state.output = append(state.output, &state) state.output = append(state.output, &state)
state.isEmpty = false state.isEmpty = false
if c.nodetype == ASSERTION { if c.nodetype == assertionNode {
state.isEmpty = true // This is a little weird. A lookaround has the 'isEmpty' flag set, even though it _isn't_ empty (the contents are the regex). But, there's so much error-checking that relies on this flag that it's better to keep it this way. state.isEmpty = true // This is a little weird. A lookaround has the 'isEmpty' flag set, even though it _isn't_ empty (the contents are the regex). But, there's so much error-checking that relies on this flag that it's better to keep it this way.
state.content = newContents(EPSILON) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string state.content = newContents(EPSILON) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string
if c.lookaroundDir == 0 || c.lookaroundSign == 0 { if c.lookaroundDir == 0 || c.lookaroundSign == 0 {
@ -917,7 +917,7 @@ func thompson(re []postfixNode) (Reg, error) {
nfa = append(nfa, &state) nfa = append(nfa, &state)
} }
if c.nodetype == LPAREN || c.nodetype == RPAREN { if c.nodetype == lparenNode || c.nodetype == rparenNode {
s := &State{} s := &State{}
s.assert = NONE s.assert = NONE
s.content = newContents(EPSILON) s.content = newContents(EPSILON)
@ -926,7 +926,7 @@ func thompson(re []postfixNode) (Reg, error) {
s.output = append(s.output, s) s.output = append(s.output, s)
s.transitions = make(map[int][]*State) s.transitions = make(map[int][]*State)
// LPAREN nodes are just added normally // LPAREN nodes are just added normally
if c.nodetype == LPAREN { if c.nodetype == lparenNode {
numGroups++ numGroups++
s.groupBegin = true s.groupBegin = true
s.groupNum = numGroups s.groupNum = numGroups
@ -940,7 +940,7 @@ func thompson(re []postfixNode) (Reg, error) {
// If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN // If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN
// and RPAREN nodes. // and RPAREN nodes.
// If neither node exists, that's a problem so I return an error. // If neither node exists, that's a problem so I return an error.
if c.nodetype == RPAREN { if c.nodetype == rparenNode {
s.groupEnd = true s.groupEnd = true
middleNode, err1 := pop(&nfa) middleNode, err1 := pop(&nfa)
lparenNode, err2 := pop(&nfa) lparenNode, err2 := pop(&nfa)
@ -969,7 +969,7 @@ func thompson(re []postfixNode) (Reg, error) {
} }
} }
} }
if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated
// Map the list of nodes to a list of states, each state containing the contents of a specific node // Map the list of nodes to a list of states, each state containing the contents of a specific node
states := funcMap(c.nodeContents, func(node postfixNode) *State { states := funcMap(c.nodeContents, func(node postfixNode) *State {
s := newState() s := newState()
@ -996,7 +996,7 @@ func thompson(re []postfixNode) (Reg, error) {
} }
// Must be an operator if it isn't a character // Must be an operator if it isn't a character
switch c.nodetype { switch c.nodetype {
case CONCATENATE: case concatenateNode:
s2 := mustPop(&nfa) s2 := mustPop(&nfa)
// Relax the requirements for concatenation a little bit - If // Relax the requirements for concatenation a little bit - If
// the second element is not found ie. the postfixNodes look // the second element is not found ie. the postfixNodes look
@ -1008,7 +1008,7 @@ func thompson(re []postfixNode) (Reg, error) {
s1 = concatenate(s1, s2) s1 = concatenate(s1, s2)
nfa = append(nfa, s1) nfa = append(nfa, s1)
} }
case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state case kleeneNode: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
s1, err := pop(&nfa) s1, err := pop(&nfa)
if err != nil { if err != nil {
return Reg{}, fmt.Errorf("error applying kleene star") return Reg{}, fmt.Errorf("error applying kleene star")
@ -1018,7 +1018,7 @@ func thompson(re []postfixNode) (Reg, error) {
return Reg{}, err return Reg{}, err
} }
nfa = append(nfa, stateToAdd) nfa = append(nfa, stateToAdd)
case PLUS: // a+ is equivalent to aa* case plusNode: // a+ is equivalent to aa*
s1 := mustPop(&nfa) s1 := mustPop(&nfa)
s2, err := kleene(*s1) s2, err := kleene(*s1)
if err != nil { if err != nil {
@ -1026,14 +1026,14 @@ func thompson(re []postfixNode) (Reg, error) {
} }
s1 = concatenate(s1, s2) s1 = concatenate(s1, s2)
nfa = append(nfa, s1) nfa = append(nfa, s1)
case QUESTION: // ab? is equivalent to a(b|) case questionNode: // ab? is equivalent to a(b|)
s1, err := pop(&nfa) s1, err := pop(&nfa)
if err != nil { if err != nil {
return Reg{}, fmt.Errorf("error applying question operator") return Reg{}, fmt.Errorf("error applying question operator")
} }
s2 := question(s1) s2 := question(s1)
nfa = append(nfa, s2) nfa = append(nfa, s2)
case PIPE: case pipeNode:
// A pipe operator doesn't actually need either operand to be present. If an operand isn't present, // A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
// it is replaced with an implicit 'matchZeroLength' state (this is the same thing that we add at the top if our // it is replaced with an implicit 'matchZeroLength' state (this is the same thing that we add at the top if our
// input has zero postfixNodes). // input has zero postfixNodes).

@ -10,16 +10,16 @@ var escapedChars []rune = []rune("wWdDbBnaftrvsS0")
// This is a list of the possible node types // This is a list of the possible node types
const ( const (
CHARACTER NodeType = iota characterNode NodeType = iota
CHARCLASS charclassNode
PIPE pipeNode
CONCATENATE concatenateNode
KLEENE kleeneNode
QUESTION questionNode
PLUS plusNode
ASSERTION assertionNode
LPAREN lparenNode
RPAREN rparenNode
) )
// Helper constants for lookarounds // Helper constants for lookarounds
@ -49,11 +49,11 @@ type postfixNode struct {
// it will not match. // it will not match.
func newCharClassNode(nodes []postfixNode, negated bool) postfixNode { func newCharClassNode(nodes []postfixNode, negated bool) postfixNode {
rtv := postfixNode{} rtv := postfixNode{}
rtv.nodetype = CHARCLASS rtv.nodetype = charclassNode
rtv.startReps = 1 rtv.startReps = 1
rtv.endReps = 1 rtv.endReps = 1
if negated { if negated {
rtv.nodetype = CHARACTER rtv.nodetype = characterNode
rtv.contents = []rune{ANY_CHAR} rtv.contents = []rune{ANY_CHAR}
rtv.allChars = true rtv.allChars = true
rtv.except = nodes rtv.except = nodes
@ -70,55 +70,55 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
toReturn.endReps = 1 toReturn.endReps = 1
switch c { switch c {
case 's': // Whitespace case 's': // Whitespace
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, whitespaceChars...) toReturn.contents = append(toReturn.contents, whitespaceChars...)
case 'S': // Non-whitespace case 'S': // Non-whitespace
toReturn = newPostfixDotNode() toReturn = newPostfixDotNode()
toReturn.except = append([]postfixNode{}, newPostfixNode(whitespaceChars...)) toReturn.except = append([]postfixNode{}, newPostfixNode(whitespaceChars...))
case 'd': // Digits case 'd': // Digits
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, digitChars...) toReturn.contents = append(toReturn.contents, digitChars...)
case 'D': // Non-digits case 'D': // Non-digits
toReturn = newPostfixDotNode() toReturn = newPostfixDotNode()
toReturn.except = append([]postfixNode{}, newPostfixNode(digitChars...)) toReturn.except = append([]postfixNode{}, newPostfixNode(digitChars...))
case 'w': // word character case 'w': // word character
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, wordChars...) toReturn.contents = append(toReturn.contents, wordChars...)
case 'W': // Non-word character case 'W': // Non-word character
toReturn = newPostfixDotNode() toReturn = newPostfixDotNode()
toReturn.except = append([]postfixNode{}, newPostfixNode(wordChars...)) toReturn.except = append([]postfixNode{}, newPostfixNode(wordChars...))
case 'b', 'B': case 'b', 'B':
if c == 'b' && inCharClass { if c == 'b' && inCharClass {
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, rune(8)) toReturn.contents = append(toReturn.contents, rune(8))
} else { } else {
toReturn.nodetype = ASSERTION toReturn.nodetype = assertionNode
toReturn.contents = append(toReturn.contents, c) toReturn.contents = append(toReturn.contents, c)
} }
case 'n': // Newline character case 'n': // Newline character
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, '\n') toReturn.contents = append(toReturn.contents, '\n')
case '0': // NULL character case '0': // NULL character
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, rune(0)) toReturn.contents = append(toReturn.contents, rune(0))
case 'a': // Bell character case 'a': // Bell character
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, rune(7)) toReturn.contents = append(toReturn.contents, rune(7))
case 'f': // Form feed character case 'f': // Form feed character
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, rune(12)) toReturn.contents = append(toReturn.contents, rune(12))
case 't': // Horizontal tab character case 't': // Horizontal tab character
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, rune(9)) toReturn.contents = append(toReturn.contents, rune(9))
case 'r': // Carriage return case 'r': // Carriage return
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, rune(13)) toReturn.contents = append(toReturn.contents, rune(13))
case 'v': // Vertical tab case 'v': // Vertical tab
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, rune(11)) toReturn.contents = append(toReturn.contents, rune(11))
case '-': // Literal hyphen - only in character class case '-': // Literal hyphen - only in character class
if inCharClass { if inCharClass {
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, '-') toReturn.contents = append(toReturn.contents, '-')
} else { } else {
return postfixNode{}, fmt.Errorf("invalid escape character") return postfixNode{}, fmt.Errorf("invalid escape character")
@ -127,7 +127,7 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
if isNormalChar(c) { // Normal characters cannot be escaped if isNormalChar(c) { // Normal characters cannot be escaped
return postfixNode{}, fmt.Errorf("invalid escape character") return postfixNode{}, fmt.Errorf("invalid escape character")
} }
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, c) toReturn.contents = append(toReturn.contents, c)
} }
return toReturn, nil return toReturn, nil
@ -142,36 +142,36 @@ func newPostfixNode(contents ...rune) postfixNode {
to_return.startReps = 1 to_return.startReps = 1
to_return.endReps = 1 to_return.endReps = 1
if len(contents) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER if len(contents) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER
to_return.nodetype = CHARACTER to_return.nodetype = characterNode
to_return.contents = contents to_return.contents = contents
} else { // Node has one element, could be anything } else { // Node has one element, could be anything
switch contents[0] { switch contents[0] {
case '+': case '+':
to_return.nodetype = PLUS to_return.nodetype = plusNode
case '?': case '?':
to_return.nodetype = QUESTION to_return.nodetype = questionNode
case '*': case '*':
to_return.nodetype = KLEENE to_return.nodetype = kleeneNode
case '|': case '|':
to_return.nodetype = PIPE to_return.nodetype = pipeNode
case CONCAT: case CONCAT:
to_return.nodetype = CONCATENATE to_return.nodetype = concatenateNode
case '^', '$': case '^', '$':
to_return.nodetype = ASSERTION to_return.nodetype = assertionNode
case '(': case '(':
to_return.nodetype = LPAREN to_return.nodetype = lparenNode
case ')': case ')':
to_return.nodetype = RPAREN to_return.nodetype = rparenNode
default: default:
to_return.nodetype = CHARACTER to_return.nodetype = characterNode
} }
to_return.contents = append(to_return.contents, contents...) to_return.contents = append(to_return.contents, contents...)
// Special cases for LPAREN and RPAREN - they have special characters defined for them // Special cases for LPAREN and RPAREN - they have special characters defined for them
if to_return.nodetype == LPAREN { if to_return.nodetype == lparenNode {
to_return.contents = []rune{LPAREN_CHAR} to_return.contents = []rune{LPAREN_CHAR}
} }
if to_return.nodetype == RPAREN { if to_return.nodetype == rparenNode {
to_return.contents = []rune{RPAREN_CHAR} to_return.contents = []rune{RPAREN_CHAR}
} }
} }
@ -183,7 +183,7 @@ func newPostfixDotNode() postfixNode {
toReturn := postfixNode{} toReturn := postfixNode{}
toReturn.startReps = 1 toReturn.startReps = 1
toReturn.endReps = 1 toReturn.endReps = 1
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.allChars = true toReturn.allChars = true
toReturn.contents = []rune{ANY_CHAR} toReturn.contents = []rune{ANY_CHAR}
return toReturn return toReturn
@ -194,7 +194,7 @@ func newPostfixCharNode(contents ...rune) postfixNode {
toReturn := postfixNode{} toReturn := postfixNode{}
toReturn.startReps = 1 toReturn.startReps = 1
toReturn.endReps = 1 toReturn.endReps = 1
toReturn.nodetype = CHARACTER toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, contents...) toReturn.contents = append(toReturn.contents, contents...)
return toReturn return toReturn
} }

Loading…
Cancel
Save