diff --git a/regex/compile.go b/regex/compile.go index 82a35d9..bec956c 100644 --- a/regex/compile.go +++ b/regex/compile.go @@ -450,7 +450,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { } // 'regex' should now contain the lookaround regex, plus the characters at the start (which indicate pos/neg, ahead/behind) // Now we should filter that out. - toAppend := postfixNode{nodetype: ASSERTION, startReps: 1, endReps: 1} + toAppend := postfixNode{nodetype: assertionNode, startReps: 1, endReps: 1} if regex[0] == '<' { // Lookbehind toAppend.lookaroundDir = LOOKBEHIND regex = regex[1:] @@ -489,7 +489,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { topStack, _ = peek(opStack) } outQueueFinalElement, _ := peek(outQueue) - if (c == '*' && outQueueFinalElement.nodetype == KLEENE) || (c == '+' && outQueueFinalElement.nodetype == PLUS) { // You cannot apply a quantifier to a quantifier in this way + if (c == '*' && outQueueFinalElement.nodetype == kleeneNode) || (c == '+' && outQueueFinalElement.nodetype == plusNode) { // You cannot apply a quantifier to a quantifier in this way return nil, fmt.Errorf("illegal use of token '%c'", c) } opStack = append(opStack, c) @@ -751,7 +751,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { idx := len(outQueue) - 1 // Get the last added node - if idx < 0 || outQueue[idx].nodetype == LPAREN { + if idx < 0 || outQueue[idx].nodetype == lparenNode { return nil, fmt.Errorf("numeric specifier with no content") } outQueue[idx].startReps = startRangeNum @@ -814,7 +814,7 @@ func thompson(re []postfixNode) (Reg, error) { } for _, c := range re { - if c.nodetype == CHARACTER || c.nodetype == ASSERTION { + if c.nodetype == characterNode || c.nodetype == assertionNode { state := State{} state.transitions = make(map[int][]*State) if c.allChars { @@ -865,7 +865,7 @@ func thompson(re []postfixNode) (Reg, error) { state.output = make([]*State, 0) state.output = append(state.output, &state) state.isEmpty = false - if c.nodetype == ASSERTION { + if c.nodetype == assertionNode { state.isEmpty = true // This is a little weird. A lookaround has the 'isEmpty' flag set, even though it _isn't_ empty (the contents are the regex). But, there's so much error-checking that relies on this flag that it's better to keep it this way. state.content = newContents(EPSILON) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string if c.lookaroundDir == 0 || c.lookaroundSign == 0 { @@ -917,7 +917,7 @@ func thompson(re []postfixNode) (Reg, error) { nfa = append(nfa, &state) } - if c.nodetype == LPAREN || c.nodetype == RPAREN { + if c.nodetype == lparenNode || c.nodetype == rparenNode { s := &State{} s.assert = NONE s.content = newContents(EPSILON) @@ -926,7 +926,7 @@ func thompson(re []postfixNode) (Reg, error) { s.output = append(s.output, s) s.transitions = make(map[int][]*State) // LPAREN nodes are just added normally - if c.nodetype == LPAREN { + if c.nodetype == lparenNode { numGroups++ s.groupBegin = true s.groupNum = numGroups @@ -940,7 +940,7 @@ func thompson(re []postfixNode) (Reg, error) { // If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN // and RPAREN nodes. // If neither node exists, that's a problem so I return an error. - if c.nodetype == RPAREN { + if c.nodetype == rparenNode { s.groupEnd = true middleNode, err1 := pop(&nfa) lparenNode, err2 := pop(&nfa) @@ -969,7 +969,7 @@ func thompson(re []postfixNode) (Reg, error) { } } } - if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated + if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated // Map the list of nodes to a list of states, each state containing the contents of a specific node states := funcMap(c.nodeContents, func(node postfixNode) *State { s := newState() @@ -996,7 +996,7 @@ func thompson(re []postfixNode) (Reg, error) { } // Must be an operator if it isn't a character switch c.nodetype { - case CONCATENATE: + case concatenateNode: s2 := mustPop(&nfa) // Relax the requirements for concatenation a little bit - If // the second element is not found ie. the postfixNodes look @@ -1008,7 +1008,7 @@ func thompson(re []postfixNode) (Reg, error) { s1 = concatenate(s1, s2) nfa = append(nfa, s1) } - case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state + case kleeneNode: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state s1, err := pop(&nfa) if err != nil { return Reg{}, fmt.Errorf("error applying kleene star") @@ -1018,7 +1018,7 @@ func thompson(re []postfixNode) (Reg, error) { return Reg{}, err } nfa = append(nfa, stateToAdd) - case PLUS: // a+ is equivalent to aa* + case plusNode: // a+ is equivalent to aa* s1 := mustPop(&nfa) s2, err := kleene(*s1) if err != nil { @@ -1026,14 +1026,14 @@ func thompson(re []postfixNode) (Reg, error) { } s1 = concatenate(s1, s2) nfa = append(nfa, s1) - case QUESTION: // ab? is equivalent to a(b|) + case questionNode: // ab? is equivalent to a(b|) s1, err := pop(&nfa) if err != nil { return Reg{}, fmt.Errorf("error applying question operator") } s2 := question(s1) nfa = append(nfa, s2) - case PIPE: + case pipeNode: // A pipe operator doesn't actually need either operand to be present. If an operand isn't present, // it is replaced with an implicit 'matchZeroLength' state (this is the same thing that we add at the top if our // input has zero postfixNodes). diff --git a/regex/postfixNode.go b/regex/postfixNode.go index 8847094..c8b8b39 100644 --- a/regex/postfixNode.go +++ b/regex/postfixNode.go @@ -10,16 +10,16 @@ var escapedChars []rune = []rune("wWdDbBnaftrvsS0") // This is a list of the possible node types const ( - CHARACTER NodeType = iota - CHARCLASS - PIPE - CONCATENATE - KLEENE - QUESTION - PLUS - ASSERTION - LPAREN - RPAREN + characterNode NodeType = iota + charclassNode + pipeNode + concatenateNode + kleeneNode + questionNode + plusNode + assertionNode + lparenNode + rparenNode ) // Helper constants for lookarounds @@ -49,11 +49,11 @@ type postfixNode struct { // it will not match. func newCharClassNode(nodes []postfixNode, negated bool) postfixNode { rtv := postfixNode{} - rtv.nodetype = CHARCLASS + rtv.nodetype = charclassNode rtv.startReps = 1 rtv.endReps = 1 if negated { - rtv.nodetype = CHARACTER + rtv.nodetype = characterNode rtv.contents = []rune{ANY_CHAR} rtv.allChars = true rtv.except = nodes @@ -70,55 +70,55 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) { toReturn.endReps = 1 switch c { case 's': // Whitespace - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, whitespaceChars...) case 'S': // Non-whitespace toReturn = newPostfixDotNode() toReturn.except = append([]postfixNode{}, newPostfixNode(whitespaceChars...)) case 'd': // Digits - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, digitChars...) case 'D': // Non-digits toReturn = newPostfixDotNode() toReturn.except = append([]postfixNode{}, newPostfixNode(digitChars...)) case 'w': // word character - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, wordChars...) case 'W': // Non-word character toReturn = newPostfixDotNode() toReturn.except = append([]postfixNode{}, newPostfixNode(wordChars...)) case 'b', 'B': if c == 'b' && inCharClass { - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, rune(8)) } else { - toReturn.nodetype = ASSERTION + toReturn.nodetype = assertionNode toReturn.contents = append(toReturn.contents, c) } case 'n': // Newline character - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, '\n') case '0': // NULL character - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, rune(0)) case 'a': // Bell character - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, rune(7)) case 'f': // Form feed character - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, rune(12)) case 't': // Horizontal tab character - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, rune(9)) case 'r': // Carriage return - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, rune(13)) case 'v': // Vertical tab - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, rune(11)) case '-': // Literal hyphen - only in character class if inCharClass { - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, '-') } else { return postfixNode{}, fmt.Errorf("invalid escape character") @@ -127,7 +127,7 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) { if isNormalChar(c) { // Normal characters cannot be escaped return postfixNode{}, fmt.Errorf("invalid escape character") } - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, c) } return toReturn, nil @@ -142,36 +142,36 @@ func newPostfixNode(contents ...rune) postfixNode { to_return.startReps = 1 to_return.endReps = 1 if len(contents) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER - to_return.nodetype = CHARACTER + to_return.nodetype = characterNode to_return.contents = contents } else { // Node has one element, could be anything switch contents[0] { case '+': - to_return.nodetype = PLUS + to_return.nodetype = plusNode case '?': - to_return.nodetype = QUESTION + to_return.nodetype = questionNode case '*': - to_return.nodetype = KLEENE + to_return.nodetype = kleeneNode case '|': - to_return.nodetype = PIPE + to_return.nodetype = pipeNode case CONCAT: - to_return.nodetype = CONCATENATE + to_return.nodetype = concatenateNode case '^', '$': - to_return.nodetype = ASSERTION + to_return.nodetype = assertionNode case '(': - to_return.nodetype = LPAREN + to_return.nodetype = lparenNode case ')': - to_return.nodetype = RPAREN + to_return.nodetype = rparenNode default: - to_return.nodetype = CHARACTER + to_return.nodetype = characterNode } to_return.contents = append(to_return.contents, contents...) // Special cases for LPAREN and RPAREN - they have special characters defined for them - if to_return.nodetype == LPAREN { + if to_return.nodetype == lparenNode { to_return.contents = []rune{LPAREN_CHAR} } - if to_return.nodetype == RPAREN { + if to_return.nodetype == rparenNode { to_return.contents = []rune{RPAREN_CHAR} } } @@ -183,7 +183,7 @@ func newPostfixDotNode() postfixNode { toReturn := postfixNode{} toReturn.startReps = 1 toReturn.endReps = 1 - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.allChars = true toReturn.contents = []rune{ANY_CHAR} return toReturn @@ -194,7 +194,7 @@ func newPostfixCharNode(contents ...rune) postfixNode { toReturn := postfixNode{} toReturn.startReps = 1 toReturn.endReps = 1 - toReturn.nodetype = CHARACTER + toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, contents...) return toReturn }