package main type NodeType int // This is a list of the possible node types const ( CHARACTER NodeType = iota PIPE CONCATENATE KLEENE QUESTION PLUS ASSERTION LPAREN RPAREN ) // Helper constants for lookarounds const POSITIVE = 1 const NEGATIVE = -1 const LOOKAHEAD = 1 const LOOKBEHIND = -1 var INFINITE_REPS int = -1 // Represents infinite reps eg. the end range in {5,} // This represents a node in the postfix representation of the expression type postfixNode struct { nodetype NodeType contents []rune // Contents of the node startReps int // Minimum number of times the node should be repeated - used with numeric specifiers endReps int // Maximum number of times the node should be repeated - used with numeric specifiers allChars bool // Whether or not the current node represents all characters (eg. dot metacharacter) except []rune // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here. lookaroundSign int // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround. lookaroundDir int // Lookbehind or lookahead } // Creates a new escaped node - the given character is assumed to have been preceded by a backslash func newEscapedNode(c rune) postfixNode { toReturn := postfixNode{} toReturn.startReps = 1 toReturn.endReps = 1 switch c { case 's': // Whitespace toReturn.nodetype = CHARACTER toReturn.contents = append(toReturn.contents, whitespaceChars...) case 'S': // Non-whitespace toReturn = newPostfixDotNode() toReturn.except = append([]rune{}, whitespaceChars...) case 'd': // Digits toReturn.nodetype = CHARACTER toReturn.contents = append(toReturn.contents, digitChars...) case 'D': // Non-digits toReturn = newPostfixDotNode() toReturn.except = append([]rune{}, digitChars...) case 'w': // word character toReturn.nodetype = CHARACTER toReturn.contents = append(toReturn.contents, wordChars...) case 'W': // Non-word character toReturn = newPostfixDotNode() toReturn.except = append([]rune{}, wordChars...) case 'b', 'B': toReturn.nodetype = ASSERTION toReturn.contents = append(toReturn.contents, c) case 'n': // Newline character toReturn.nodetype = CHARACTER toReturn.contents = append(toReturn.contents, '\n') default: // None of the above - append it as a regular character toReturn.nodetype = CHARACTER toReturn.contents = append(toReturn.contents, c) } return toReturn } // Creates and returns a postfixNode based on the given contents func newPostfixNode(contents ...rune) postfixNode { if len(contents) < 1 { panic("Empty node.") } to_return := postfixNode{} to_return.startReps = 1 to_return.endReps = 1 if len(contents) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER to_return.nodetype = CHARACTER to_return.contents = contents } else { // Node has one element, could be anything switch contents[0] { case '+': to_return.nodetype = PLUS case '?': to_return.nodetype = QUESTION case '*': to_return.nodetype = KLEENE case '|': to_return.nodetype = PIPE case CONCAT: to_return.nodetype = CONCATENATE case '^', '$': to_return.nodetype = ASSERTION case '(': to_return.nodetype = LPAREN case ')': to_return.nodetype = RPAREN default: to_return.nodetype = CHARACTER } to_return.contents = append(to_return.contents, contents...) // Special cases for LPAREN and RPAREN - they have special characters defined for them if to_return.nodetype == LPAREN { to_return.contents = []rune{LPAREN_CHAR} } if to_return.nodetype == RPAREN { to_return.contents = []rune{RPAREN_CHAR} } } return to_return } // Creates and returns a postfixNode representing the 'dot' metacharacter. func newPostfixDotNode() postfixNode { toReturn := postfixNode{} toReturn.startReps = 1 toReturn.endReps = 1 toReturn.nodetype = CHARACTER toReturn.allChars = true toReturn.contents = []rune{ANY_CHAR} return toReturn } // Creates a character node, regardless of the contents func newPostfixCharNode(contents ...rune) postfixNode { toReturn := postfixNode{} toReturn.startReps = 1 toReturn.endReps = 1 toReturn.nodetype = CHARACTER toReturn.contents = append(toReturn.contents, contents...) return toReturn }