Added a new class 'CHARCLASS', which represents a character class with some other postfixNodes in it. The 'except' field now contains a list of postfixNodes rather than runes
This commit is contained in:
@@ -2,9 +2,14 @@ package main
|
|||||||
|
|
||||||
type NodeType int
|
type NodeType int
|
||||||
|
|
||||||
|
// This is a slice containing all escapable characters that have special meaning.
|
||||||
|
// Eg. \b is word boundary, \w is word character etc.
|
||||||
|
var escapedChars []rune = []rune("wWdDbBnaftrvsS0")
|
||||||
|
|
||||||
// This is a list of the possible node types
|
// This is a list of the possible node types
|
||||||
const (
|
const (
|
||||||
CHARACTER NodeType = iota
|
CHARACTER NodeType = iota
|
||||||
|
CHARCLASS
|
||||||
PIPE
|
PIPE
|
||||||
CONCATENATE
|
CONCATENATE
|
||||||
KLEENE
|
KLEENE
|
||||||
@@ -25,13 +30,35 @@ var INFINITE_REPS int = -1 // Represents infinite reps eg. the end range in {5,}
|
|||||||
// This represents a node in the postfix representation of the expression
|
// This represents a node in the postfix representation of the expression
|
||||||
type postfixNode struct {
|
type postfixNode struct {
|
||||||
nodetype NodeType
|
nodetype NodeType
|
||||||
contents []rune // Contents of the node
|
contents []rune // Contents of the node
|
||||||
startReps int // Minimum number of times the node should be repeated - used with numeric specifiers
|
startReps int // Minimum number of times the node should be repeated - used with numeric specifiers
|
||||||
endReps int // Maximum number of times the node should be repeated - used with numeric specifiers
|
endReps int // Maximum number of times the node should be repeated - used with numeric specifiers
|
||||||
allChars bool // Whether or not the current node represents all characters (eg. dot metacharacter)
|
allChars bool // Whether or not the current node represents all characters (eg. dot metacharacter)
|
||||||
except []rune // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
|
except []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
|
||||||
lookaroundSign int // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
|
lookaroundSign int // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
|
||||||
lookaroundDir int // Lookbehind or lookahead
|
lookaroundDir int // Lookbehind or lookahead
|
||||||
|
nodeContents []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Converts the given list of postfixNodes to one node of type CHARCLASS.
|
||||||
|
// Used to convert eg. 'a', 'b' and 'c' to '[abc]'.
|
||||||
|
// If the character class is negated, it returns a postfixNode of type CHARACTER.
|
||||||
|
// This node will behave like the dot metacharacter, but it has a longer list of runes that
|
||||||
|
// it will not match.
|
||||||
|
func newCharClassNode(nodes []postfixNode, negated bool) postfixNode {
|
||||||
|
rtv := postfixNode{}
|
||||||
|
rtv.nodetype = CHARCLASS
|
||||||
|
rtv.startReps = 1
|
||||||
|
rtv.endReps = 1
|
||||||
|
if negated {
|
||||||
|
rtv.nodetype = CHARACTER
|
||||||
|
rtv.contents = []rune{ANY_CHAR}
|
||||||
|
rtv.allChars = true
|
||||||
|
rtv.except = nodes
|
||||||
|
} else {
|
||||||
|
rtv.nodeContents = nodes
|
||||||
|
}
|
||||||
|
return rtv
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates a new escaped node - the given character is assumed to have been preceded by a backslash
|
// Creates a new escaped node - the given character is assumed to have been preceded by a backslash
|
||||||
@@ -45,25 +72,43 @@ func newEscapedNode(c rune) postfixNode {
|
|||||||
toReturn.contents = append(toReturn.contents, whitespaceChars...)
|
toReturn.contents = append(toReturn.contents, whitespaceChars...)
|
||||||
case 'S': // Non-whitespace
|
case 'S': // Non-whitespace
|
||||||
toReturn = newPostfixDotNode()
|
toReturn = newPostfixDotNode()
|
||||||
toReturn.except = append([]rune{}, whitespaceChars...)
|
toReturn.except = append([]postfixNode{}, newPostfixNode(whitespaceChars...))
|
||||||
case 'd': // Digits
|
case 'd': // Digits
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = CHARACTER
|
||||||
toReturn.contents = append(toReturn.contents, digitChars...)
|
toReturn.contents = append(toReturn.contents, digitChars...)
|
||||||
case 'D': // Non-digits
|
case 'D': // Non-digits
|
||||||
toReturn = newPostfixDotNode()
|
toReturn = newPostfixDotNode()
|
||||||
toReturn.except = append([]rune{}, digitChars...)
|
toReturn.except = append([]postfixNode{}, newPostfixNode(digitChars...))
|
||||||
case 'w': // word character
|
case 'w': // word character
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = CHARACTER
|
||||||
toReturn.contents = append(toReturn.contents, wordChars...)
|
toReturn.contents = append(toReturn.contents, wordChars...)
|
||||||
case 'W': // Non-word character
|
case 'W': // Non-word character
|
||||||
toReturn = newPostfixDotNode()
|
toReturn = newPostfixDotNode()
|
||||||
toReturn.except = append([]rune{}, wordChars...)
|
toReturn.except = append([]postfixNode{}, newPostfixNode(wordChars...))
|
||||||
case 'b', 'B':
|
case 'b', 'B':
|
||||||
toReturn.nodetype = ASSERTION
|
toReturn.nodetype = ASSERTION
|
||||||
toReturn.contents = append(toReturn.contents, c)
|
toReturn.contents = append(toReturn.contents, c)
|
||||||
case 'n': // Newline character
|
case 'n': // Newline character
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = CHARACTER
|
||||||
toReturn.contents = append(toReturn.contents, '\n')
|
toReturn.contents = append(toReturn.contents, '\n')
|
||||||
|
case '0': // NULL character
|
||||||
|
toReturn.nodetype = CHARACTER
|
||||||
|
toReturn.contents = append(toReturn.contents, rune(0))
|
||||||
|
case 'a': // Bell character
|
||||||
|
toReturn.nodetype = CHARACTER
|
||||||
|
toReturn.contents = append(toReturn.contents, rune(7))
|
||||||
|
case 'f': // Form feed character
|
||||||
|
toReturn.nodetype = CHARACTER
|
||||||
|
toReturn.contents = append(toReturn.contents, rune(12))
|
||||||
|
case 't': // Horizontal tab character
|
||||||
|
toReturn.nodetype = CHARACTER
|
||||||
|
toReturn.contents = append(toReturn.contents, rune(9))
|
||||||
|
case 'r': // Carriage return
|
||||||
|
toReturn.nodetype = CHARACTER
|
||||||
|
toReturn.contents = append(toReturn.contents, rune(13))
|
||||||
|
case 'v': // Vertical tab
|
||||||
|
toReturn.nodetype = CHARACTER
|
||||||
|
toReturn.contents = append(toReturn.contents, rune(11))
|
||||||
default: // None of the above - append it as a regular character
|
default: // None of the above - append it as a regular character
|
||||||
toReturn.nodetype = CHARACTER
|
toReturn.nodetype = CHARACTER
|
||||||
toReturn.contents = append(toReturn.contents, c)
|
toReturn.contents = append(toReturn.contents, c)
|
||||||
|
Reference in New Issue
Block a user