Added a new class 'CHARCLASS', which represents a character class with some other postfixNodes in it. The 'except' field now contains a list of postfixNodes rather than runes

3 days ago · 7056026e10
parent b81a2f8452
commit 7056026e10
1 changed files with 55 additions and 10 deletions
--- a/postfixNode.go
+++ b/postfixNode.go
@ -2,9 +2,14 @@ package main
 type NodeType int
 // This is a slice containing all escapable characters that have special meaning.
 // Eg. \b is word boundary, \w is word character etc.
 var escapedChars []rune = []rune("wWdDbBnaftrvsS0")
 // This is a list of the possible node types
 const (
 	CHARACTER NodeType = iota
 	CHARCLASS
 	PIPE
 	CONCATENATE
 	KLEENE
@ -25,13 +30,35 @@ var INFINITE_REPS int = -1 // Represents infinite reps eg. the end range in {5,}
 // This represents a node in the postfix representation of the expression
 type postfixNode struct {
 	nodetype       NodeType
-	contents       []rune // Contents of the node
+	contents       []rune        // Contents of the node
-	startReps      int    // Minimum number of times the node should be repeated - used with numeric specifiers
+	startReps      int           // Minimum number of times the node should be repeated - used with numeric specifiers
-	endReps        int    // Maximum number of times the node should be repeated - used with numeric specifiers
+	endReps        int           // Maximum number of times the node should be repeated - used with numeric specifiers
-	allChars       bool   // Whether or not the current node represents all characters (eg. dot metacharacter)
+	allChars       bool          // Whether or not the current node represents all characters (eg. dot metacharacter)
-	except         []rune // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
+	except         []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
-	lookaroundSign int    // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
+	lookaroundSign int           // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
-	lookaroundDir  int    // Lookbehind or lookahead
+	lookaroundDir  int           // Lookbehind or lookahead
 	nodeContents   []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
 }
 // Converts the given list of postfixNodes to one node of type CHARCLASS.
 // Used to convert eg. 'a', 'b' and 'c' to '[abc]'.
 // If the character class is negated, it returns a postfixNode of type CHARACTER.
 // This node will behave like the dot metacharacter, but it has a longer list of runes that
 // it will not match.
 func newCharClassNode(nodes []postfixNode, negated bool) postfixNode {
 	rtv := postfixNode{}
 	rtv.nodetype = CHARCLASS
 	rtv.startReps = 1
 	rtv.endReps = 1
 	if negated {
 		rtv.nodetype = CHARACTER
 		rtv.contents = []rune{ANY_CHAR}
 		rtv.allChars = true
 		rtv.except = nodes
 	} else {
 		rtv.nodeContents = nodes
 	}
 	return rtv
 }
 // Creates a new escaped node - the given character is assumed to have been preceded by a backslash
@ -45,25 +72,43 @@ func newEscapedNode(c rune) postfixNode {
 		toReturn.contents = append(toReturn.contents, whitespaceChars...)
 	case 'S': // Non-whitespace
 		toReturn = newPostfixDotNode()
-		toReturn.except = append([]rune{}, whitespaceChars...)
+		toReturn.except = append([]postfixNode{}, newPostfixNode(whitespaceChars...))
 	case 'd': // Digits
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, digitChars...)
 	case 'D': // Non-digits
 		toReturn = newPostfixDotNode()
-		toReturn.except = append([]rune{}, digitChars...)
+		toReturn.except = append([]postfixNode{}, newPostfixNode(digitChars...))
 	case 'w': // word character
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, wordChars...)
 	case 'W': // Non-word character
 		toReturn = newPostfixDotNode()
-		toReturn.except = append([]rune{}, wordChars...)
+		toReturn.except = append([]postfixNode{}, newPostfixNode(wordChars...))
 	case 'b', 'B':
 		toReturn.nodetype = ASSERTION
 		toReturn.contents = append(toReturn.contents, c)
 	case 'n': // Newline character
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, '\n')
 	case '0': // NULL character
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, rune(0))
 	case 'a': // Bell character
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, rune(7))
 	case 'f': // Form feed character
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, rune(12))
 	case 't': // Horizontal tab character
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, rune(9))
 	case 'r': // Carriage return
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, rune(13))
 	case 'v': // Vertical tab
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, rune(11))
 	default: // None of the above - append it as a regular character
 		toReturn.nodetype = CHARACTER
 		toReturn.contents = append(toReturn.contents, c)