Updated nodeType constants so that they aren't exported
This commit is contained in:
@@ -450,7 +450,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
// 'regex' should now contain the lookaround regex, plus the characters at the start (which indicate pos/neg, ahead/behind)
|
// 'regex' should now contain the lookaround regex, plus the characters at the start (which indicate pos/neg, ahead/behind)
|
||||||
// Now we should filter that out.
|
// Now we should filter that out.
|
||||||
toAppend := postfixNode{nodetype: ASSERTION, startReps: 1, endReps: 1}
|
toAppend := postfixNode{nodetype: assertionNode, startReps: 1, endReps: 1}
|
||||||
if regex[0] == '<' { // Lookbehind
|
if regex[0] == '<' { // Lookbehind
|
||||||
toAppend.lookaroundDir = LOOKBEHIND
|
toAppend.lookaroundDir = LOOKBEHIND
|
||||||
regex = regex[1:]
|
regex = regex[1:]
|
||||||
@@ -489,7 +489,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
topStack, _ = peek(opStack)
|
topStack, _ = peek(opStack)
|
||||||
}
|
}
|
||||||
outQueueFinalElement, _ := peek(outQueue)
|
outQueueFinalElement, _ := peek(outQueue)
|
||||||
if (c == '*' && outQueueFinalElement.nodetype == KLEENE) || (c == '+' && outQueueFinalElement.nodetype == PLUS) { // You cannot apply a quantifier to a quantifier in this way
|
if (c == '*' && outQueueFinalElement.nodetype == kleeneNode) || (c == '+' && outQueueFinalElement.nodetype == plusNode) { // You cannot apply a quantifier to a quantifier in this way
|
||||||
return nil, fmt.Errorf("illegal use of token '%c'", c)
|
return nil, fmt.Errorf("illegal use of token '%c'", c)
|
||||||
}
|
}
|
||||||
opStack = append(opStack, c)
|
opStack = append(opStack, c)
|
||||||
@@ -751,7 +751,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
|
|
||||||
idx := len(outQueue) - 1
|
idx := len(outQueue) - 1
|
||||||
// Get the last added node
|
// Get the last added node
|
||||||
if idx < 0 || outQueue[idx].nodetype == LPAREN {
|
if idx < 0 || outQueue[idx].nodetype == lparenNode {
|
||||||
return nil, fmt.Errorf("numeric specifier with no content")
|
return nil, fmt.Errorf("numeric specifier with no content")
|
||||||
}
|
}
|
||||||
outQueue[idx].startReps = startRangeNum
|
outQueue[idx].startReps = startRangeNum
|
||||||
@@ -814,7 +814,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range re {
|
for _, c := range re {
|
||||||
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
|
if c.nodetype == characterNode || c.nodetype == assertionNode {
|
||||||
state := State{}
|
state := State{}
|
||||||
state.transitions = make(map[int][]*State)
|
state.transitions = make(map[int][]*State)
|
||||||
if c.allChars {
|
if c.allChars {
|
||||||
@@ -865,7 +865,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
state.output = make([]*State, 0)
|
state.output = make([]*State, 0)
|
||||||
state.output = append(state.output, &state)
|
state.output = append(state.output, &state)
|
||||||
state.isEmpty = false
|
state.isEmpty = false
|
||||||
if c.nodetype == ASSERTION {
|
if c.nodetype == assertionNode {
|
||||||
state.isEmpty = true // This is a little weird. A lookaround has the 'isEmpty' flag set, even though it _isn't_ empty (the contents are the regex). But, there's so much error-checking that relies on this flag that it's better to keep it this way.
|
state.isEmpty = true // This is a little weird. A lookaround has the 'isEmpty' flag set, even though it _isn't_ empty (the contents are the regex). But, there's so much error-checking that relies on this flag that it's better to keep it this way.
|
||||||
state.content = newContents(EPSILON) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string
|
state.content = newContents(EPSILON) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string
|
||||||
if c.lookaroundDir == 0 || c.lookaroundSign == 0 {
|
if c.lookaroundDir == 0 || c.lookaroundSign == 0 {
|
||||||
@@ -917,7 +917,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
|
|
||||||
nfa = append(nfa, &state)
|
nfa = append(nfa, &state)
|
||||||
}
|
}
|
||||||
if c.nodetype == LPAREN || c.nodetype == RPAREN {
|
if c.nodetype == lparenNode || c.nodetype == rparenNode {
|
||||||
s := &State{}
|
s := &State{}
|
||||||
s.assert = NONE
|
s.assert = NONE
|
||||||
s.content = newContents(EPSILON)
|
s.content = newContents(EPSILON)
|
||||||
@@ -926,7 +926,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
s.output = append(s.output, s)
|
s.output = append(s.output, s)
|
||||||
s.transitions = make(map[int][]*State)
|
s.transitions = make(map[int][]*State)
|
||||||
// LPAREN nodes are just added normally
|
// LPAREN nodes are just added normally
|
||||||
if c.nodetype == LPAREN {
|
if c.nodetype == lparenNode {
|
||||||
numGroups++
|
numGroups++
|
||||||
s.groupBegin = true
|
s.groupBegin = true
|
||||||
s.groupNum = numGroups
|
s.groupNum = numGroups
|
||||||
@@ -940,7 +940,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
// If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN
|
// If the middle node doesn't exist (ie. something like '()' ), that's fine, I just connect the LPAREN
|
||||||
// and RPAREN nodes.
|
// and RPAREN nodes.
|
||||||
// If neither node exists, that's a problem so I return an error.
|
// If neither node exists, that's a problem so I return an error.
|
||||||
if c.nodetype == RPAREN {
|
if c.nodetype == rparenNode {
|
||||||
s.groupEnd = true
|
s.groupEnd = true
|
||||||
middleNode, err1 := pop(&nfa)
|
middleNode, err1 := pop(&nfa)
|
||||||
lparenNode, err2 := pop(&nfa)
|
lparenNode, err2 := pop(&nfa)
|
||||||
@@ -969,7 +969,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if c.nodetype == CHARCLASS { // A Character class consists of all the nodes in it, alternated
|
if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated
|
||||||
// Map the list of nodes to a list of states, each state containing the contents of a specific node
|
// Map the list of nodes to a list of states, each state containing the contents of a specific node
|
||||||
states := funcMap(c.nodeContents, func(node postfixNode) *State {
|
states := funcMap(c.nodeContents, func(node postfixNode) *State {
|
||||||
s := newState()
|
s := newState()
|
||||||
@@ -996,7 +996,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
}
|
}
|
||||||
// Must be an operator if it isn't a character
|
// Must be an operator if it isn't a character
|
||||||
switch c.nodetype {
|
switch c.nodetype {
|
||||||
case CONCATENATE:
|
case concatenateNode:
|
||||||
s2 := mustPop(&nfa)
|
s2 := mustPop(&nfa)
|
||||||
// Relax the requirements for concatenation a little bit - If
|
// Relax the requirements for concatenation a little bit - If
|
||||||
// the second element is not found ie. the postfixNodes look
|
// the second element is not found ie. the postfixNodes look
|
||||||
@@ -1008,7 +1008,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
s1 = concatenate(s1, s2)
|
s1 = concatenate(s1, s2)
|
||||||
nfa = append(nfa, s1)
|
nfa = append(nfa, s1)
|
||||||
}
|
}
|
||||||
case KLEENE: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
|
case kleeneNode: // Create a 0-state, concat the popped state after it, concat the 0-state after the popped state
|
||||||
s1, err := pop(&nfa)
|
s1, err := pop(&nfa)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Reg{}, fmt.Errorf("error applying kleene star")
|
return Reg{}, fmt.Errorf("error applying kleene star")
|
||||||
@@ -1018,7 +1018,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
return Reg{}, err
|
return Reg{}, err
|
||||||
}
|
}
|
||||||
nfa = append(nfa, stateToAdd)
|
nfa = append(nfa, stateToAdd)
|
||||||
case PLUS: // a+ is equivalent to aa*
|
case plusNode: // a+ is equivalent to aa*
|
||||||
s1 := mustPop(&nfa)
|
s1 := mustPop(&nfa)
|
||||||
s2, err := kleene(*s1)
|
s2, err := kleene(*s1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -1026,14 +1026,14 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
}
|
}
|
||||||
s1 = concatenate(s1, s2)
|
s1 = concatenate(s1, s2)
|
||||||
nfa = append(nfa, s1)
|
nfa = append(nfa, s1)
|
||||||
case QUESTION: // ab? is equivalent to a(b|)
|
case questionNode: // ab? is equivalent to a(b|)
|
||||||
s1, err := pop(&nfa)
|
s1, err := pop(&nfa)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Reg{}, fmt.Errorf("error applying question operator")
|
return Reg{}, fmt.Errorf("error applying question operator")
|
||||||
}
|
}
|
||||||
s2 := question(s1)
|
s2 := question(s1)
|
||||||
nfa = append(nfa, s2)
|
nfa = append(nfa, s2)
|
||||||
case PIPE:
|
case pipeNode:
|
||||||
// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
|
// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
|
||||||
// it is replaced with an implicit 'matchZeroLength' state (this is the same thing that we add at the top if our
|
// it is replaced with an implicit 'matchZeroLength' state (this is the same thing that we add at the top if our
|
||||||
// input has zero postfixNodes).
|
// input has zero postfixNodes).
|
||||||
|
@@ -10,16 +10,16 @@ var escapedChars []rune = []rune("wWdDbBnaftrvsS0")
|
|||||||
|
|
||||||
// This is a list of the possible node types
|
// This is a list of the possible node types
|
||||||
const (
|
const (
|
||||||
CHARACTER NodeType = iota
|
characterNode NodeType = iota
|
||||||
CHARCLASS
|
charclassNode
|
||||||
PIPE
|
pipeNode
|
||||||
CONCATENATE
|
concatenateNode
|
||||||
KLEENE
|
kleeneNode
|
||||||
QUESTION
|
questionNode
|
||||||
PLUS
|
plusNode
|
||||||
ASSERTION
|
assertionNode
|
||||||
LPAREN
|
lparenNode
|
||||||
RPAREN
|
rparenNode
|
||||||
)
|
)
|
||||||
|
|
||||||
// Helper constants for lookarounds
|
// Helper constants for lookarounds
|
||||||
@@ -49,11 +49,11 @@ type postfixNode struct {
|
|||||||
// it will not match.
|
// it will not match.
|
||||||
func newCharClassNode(nodes []postfixNode, negated bool) postfixNode {
|
func newCharClassNode(nodes []postfixNode, negated bool) postfixNode {
|
||||||
rtv := postfixNode{}
|
rtv := postfixNode{}
|
||||||
rtv.nodetype = CHARCLASS
|
rtv.nodetype = charclassNode
|
||||||
rtv.startReps = 1
|
rtv.startReps = 1
|
||||||
rtv.endReps = 1
|
rtv.endReps = 1
|
||||||
if negated {
|
if negated {
|
||||||
rtv.nodetype = CHARACTER
|
rtv.nodetype = characterNode
|
||||||
rtv.contents = []rune{ANY_CHAR}
|
rtv.contents = []rune{ANY_CHAR}
|
||||||
rtv.allChars = true
|
rtv.allChars = true
|
||||||
rtv.except = nodes
|
rtv.except = nodes
|
||||||
@@ -70,55 +70,55 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
|
|||||||
toReturn.endReps = 1
|
toReturn.endReps = 1
|
||||||
switch c {
|
switch c {
|
||||||
case 's': // Whitespace
|
case 's': // Whitespace
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, whitespaceChars...)
|
toReturn.contents = append(toReturn.contents, whitespaceChars...)
|
||||||
case 'S': // Non-whitespace
|
case 'S': // Non-whitespace
|
||||||
toReturn = newPostfixDotNode()
|
toReturn = newPostfixDotNode()
|
||||||
toReturn.except = append([]postfixNode{}, newPostfixNode(whitespaceChars...))
|
toReturn.except = append([]postfixNode{}, newPostfixNode(whitespaceChars...))
|
||||||
case 'd': // Digits
|
case 'd': // Digits
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, digitChars...)
|
toReturn.contents = append(toReturn.contents, digitChars...)
|
||||||
case 'D': // Non-digits
|
case 'D': // Non-digits
|
||||||
toReturn = newPostfixDotNode()
|
toReturn = newPostfixDotNode()
|
||||||
toReturn.except = append([]postfixNode{}, newPostfixNode(digitChars...))
|
toReturn.except = append([]postfixNode{}, newPostfixNode(digitChars...))
|
||||||
case 'w': // word character
|
case 'w': // word character
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, wordChars...)
|
toReturn.contents = append(toReturn.contents, wordChars...)
|
||||||
case 'W': // Non-word character
|
case 'W': // Non-word character
|
||||||
toReturn = newPostfixDotNode()
|
toReturn = newPostfixDotNode()
|
||||||
toReturn.except = append([]postfixNode{}, newPostfixNode(wordChars...))
|
toReturn.except = append([]postfixNode{}, newPostfixNode(wordChars...))
|
||||||
case 'b', 'B':
|
case 'b', 'B':
|
||||||
if c == 'b' && inCharClass {
|
if c == 'b' && inCharClass {
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, rune(8))
|
toReturn.contents = append(toReturn.contents, rune(8))
|
||||||
} else {
|
} else {
|
||||||
toReturn.nodetype = ASSERTION
|
toReturn.nodetype = assertionNode
|
||||||
toReturn.contents = append(toReturn.contents, c)
|
toReturn.contents = append(toReturn.contents, c)
|
||||||
}
|
}
|
||||||
case 'n': // Newline character
|
case 'n': // Newline character
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, '\n')
|
toReturn.contents = append(toReturn.contents, '\n')
|
||||||
case '0': // NULL character
|
case '0': // NULL character
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, rune(0))
|
toReturn.contents = append(toReturn.contents, rune(0))
|
||||||
case 'a': // Bell character
|
case 'a': // Bell character
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, rune(7))
|
toReturn.contents = append(toReturn.contents, rune(7))
|
||||||
case 'f': // Form feed character
|
case 'f': // Form feed character
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, rune(12))
|
toReturn.contents = append(toReturn.contents, rune(12))
|
||||||
case 't': // Horizontal tab character
|
case 't': // Horizontal tab character
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, rune(9))
|
toReturn.contents = append(toReturn.contents, rune(9))
|
||||||
case 'r': // Carriage return
|
case 'r': // Carriage return
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, rune(13))
|
toReturn.contents = append(toReturn.contents, rune(13))
|
||||||
case 'v': // Vertical tab
|
case 'v': // Vertical tab
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, rune(11))
|
toReturn.contents = append(toReturn.contents, rune(11))
|
||||||
case '-': // Literal hyphen - only in character class
|
case '-': // Literal hyphen - only in character class
|
||||||
if inCharClass {
|
if inCharClass {
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, '-')
|
toReturn.contents = append(toReturn.contents, '-')
|
||||||
} else {
|
} else {
|
||||||
return postfixNode{}, fmt.Errorf("invalid escape character")
|
return postfixNode{}, fmt.Errorf("invalid escape character")
|
||||||
@@ -127,7 +127,7 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
|
|||||||
if isNormalChar(c) { // Normal characters cannot be escaped
|
if isNormalChar(c) { // Normal characters cannot be escaped
|
||||||
return postfixNode{}, fmt.Errorf("invalid escape character")
|
return postfixNode{}, fmt.Errorf("invalid escape character")
|
||||||
}
|
}
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, c)
|
toReturn.contents = append(toReturn.contents, c)
|
||||||
}
|
}
|
||||||
return toReturn, nil
|
return toReturn, nil
|
||||||
@@ -142,36 +142,36 @@ func newPostfixNode(contents ...rune) postfixNode {
|
|||||||
to_return.startReps = 1
|
to_return.startReps = 1
|
||||||
to_return.endReps = 1
|
to_return.endReps = 1
|
||||||
if len(contents) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER
|
if len(contents) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER
|
||||||
to_return.nodetype = CHARACTER
|
to_return.nodetype = characterNode
|
||||||
to_return.contents = contents
|
to_return.contents = contents
|
||||||
} else { // Node has one element, could be anything
|
} else { // Node has one element, could be anything
|
||||||
switch contents[0] {
|
switch contents[0] {
|
||||||
case '+':
|
case '+':
|
||||||
to_return.nodetype = PLUS
|
to_return.nodetype = plusNode
|
||||||
case '?':
|
case '?':
|
||||||
to_return.nodetype = QUESTION
|
to_return.nodetype = questionNode
|
||||||
case '*':
|
case '*':
|
||||||
to_return.nodetype = KLEENE
|
to_return.nodetype = kleeneNode
|
||||||
case '|':
|
case '|':
|
||||||
to_return.nodetype = PIPE
|
to_return.nodetype = pipeNode
|
||||||
case CONCAT:
|
case CONCAT:
|
||||||
to_return.nodetype = CONCATENATE
|
to_return.nodetype = concatenateNode
|
||||||
case '^', '$':
|
case '^', '$':
|
||||||
to_return.nodetype = ASSERTION
|
to_return.nodetype = assertionNode
|
||||||
case '(':
|
case '(':
|
||||||
to_return.nodetype = LPAREN
|
to_return.nodetype = lparenNode
|
||||||
case ')':
|
case ')':
|
||||||
to_return.nodetype = RPAREN
|
to_return.nodetype = rparenNode
|
||||||
default:
|
default:
|
||||||
to_return.nodetype = CHARACTER
|
to_return.nodetype = characterNode
|
||||||
}
|
}
|
||||||
to_return.contents = append(to_return.contents, contents...)
|
to_return.contents = append(to_return.contents, contents...)
|
||||||
|
|
||||||
// Special cases for LPAREN and RPAREN - they have special characters defined for them
|
// Special cases for LPAREN and RPAREN - they have special characters defined for them
|
||||||
if to_return.nodetype == LPAREN {
|
if to_return.nodetype == lparenNode {
|
||||||
to_return.contents = []rune{LPAREN_CHAR}
|
to_return.contents = []rune{LPAREN_CHAR}
|
||||||
}
|
}
|
||||||
if to_return.nodetype == RPAREN {
|
if to_return.nodetype == rparenNode {
|
||||||
to_return.contents = []rune{RPAREN_CHAR}
|
to_return.contents = []rune{RPAREN_CHAR}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -183,7 +183,7 @@ func newPostfixDotNode() postfixNode {
|
|||||||
toReturn := postfixNode{}
|
toReturn := postfixNode{}
|
||||||
toReturn.startReps = 1
|
toReturn.startReps = 1
|
||||||
toReturn.endReps = 1
|
toReturn.endReps = 1
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.allChars = true
|
toReturn.allChars = true
|
||||||
toReturn.contents = []rune{ANY_CHAR}
|
toReturn.contents = []rune{ANY_CHAR}
|
||||||
return toReturn
|
return toReturn
|
||||||
@@ -194,7 +194,7 @@ func newPostfixCharNode(contents ...rune) postfixNode {
|
|||||||
toReturn := postfixNode{}
|
toReturn := postfixNode{}
|
||||||
toReturn.startReps = 1
|
toReturn.startReps = 1
|
||||||
toReturn.endReps = 1
|
toReturn.endReps = 1
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, contents...)
|
toReturn.contents = append(toReturn.contents, contents...)
|
||||||
return toReturn
|
return toReturn
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user