Added a new class 'CHARCLASS', which represents a character class with some other postfixNodes in it. The 'except' field now contains a list of postfixNodes rather than runes

master
Aadhavan Srinivasan 2 days ago
parent b81a2f8452
commit 7056026e10

@ -2,9 +2,14 @@ package main
type NodeType int
// This is a slice containing all escapable characters that have special meaning.
// Eg. \b is word boundary, \w is word character etc.
var escapedChars []rune = []rune("wWdDbBnaftrvsS0")
// This is a list of the possible node types
const (
CHARACTER NodeType = iota
CHARCLASS
PIPE
CONCATENATE
KLEENE
@ -25,13 +30,35 @@ var INFINITE_REPS int = -1 // Represents infinite reps eg. the end range in {5,}
// This represents a node in the postfix representation of the expression
type postfixNode struct {
nodetype NodeType
contents []rune // Contents of the node
startReps int // Minimum number of times the node should be repeated - used with numeric specifiers
endReps int // Maximum number of times the node should be repeated - used with numeric specifiers
allChars bool // Whether or not the current node represents all characters (eg. dot metacharacter)
except []rune // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
lookaroundSign int // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
lookaroundDir int // Lookbehind or lookahead
contents []rune // Contents of the node
startReps int // Minimum number of times the node should be repeated - used with numeric specifiers
endReps int // Maximum number of times the node should be repeated - used with numeric specifiers
allChars bool // Whether or not the current node represents all characters (eg. dot metacharacter)
except []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
lookaroundSign int // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
lookaroundDir int // Lookbehind or lookahead
nodeContents []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
}
// Converts the given list of postfixNodes to one node of type CHARCLASS.
// Used to convert eg. 'a', 'b' and 'c' to '[abc]'.
// If the character class is negated, it returns a postfixNode of type CHARACTER.
// This node will behave like the dot metacharacter, but it has a longer list of runes that
// it will not match.
func newCharClassNode(nodes []postfixNode, negated bool) postfixNode {
rtv := postfixNode{}
rtv.nodetype = CHARCLASS
rtv.startReps = 1
rtv.endReps = 1
if negated {
rtv.nodetype = CHARACTER
rtv.contents = []rune{ANY_CHAR}
rtv.allChars = true
rtv.except = nodes
} else {
rtv.nodeContents = nodes
}
return rtv
}
// Creates a new escaped node - the given character is assumed to have been preceded by a backslash
@ -45,25 +72,43 @@ func newEscapedNode(c rune) postfixNode {
toReturn.contents = append(toReturn.contents, whitespaceChars...)
case 'S': // Non-whitespace
toReturn = newPostfixDotNode()
toReturn.except = append([]rune{}, whitespaceChars...)
toReturn.except = append([]postfixNode{}, newPostfixNode(whitespaceChars...))
case 'd': // Digits
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, digitChars...)
case 'D': // Non-digits
toReturn = newPostfixDotNode()
toReturn.except = append([]rune{}, digitChars...)
toReturn.except = append([]postfixNode{}, newPostfixNode(digitChars...))
case 'w': // word character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, wordChars...)
case 'W': // Non-word character
toReturn = newPostfixDotNode()
toReturn.except = append([]rune{}, wordChars...)
toReturn.except = append([]postfixNode{}, newPostfixNode(wordChars...))
case 'b', 'B':
toReturn.nodetype = ASSERTION
toReturn.contents = append(toReturn.contents, c)
case 'n': // Newline character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, '\n')
case '0': // NULL character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, rune(0))
case 'a': // Bell character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, rune(7))
case 'f': // Form feed character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, rune(12))
case 't': // Horizontal tab character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, rune(9))
case 'r': // Carriage return
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, rune(13))
case 'v': // Vertical tab
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, rune(11))
default: // None of the above - append it as a regular character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, c)

Loading…
Cancel
Save