|
|
|
package main
|
|
|
|
|
|
|
|
import "slices"
|
|
|
|
|
|
|
|
type NodeType int
|
|
|
|
|
|
|
|
// This is a list of the possible node types
|
|
|
|
const (
|
|
|
|
CHARACTER NodeType = iota
|
|
|
|
PIPE
|
|
|
|
CONCATENATE
|
|
|
|
KLEENE
|
|
|
|
QUESTION
|
|
|
|
PLUS
|
|
|
|
ASSERTION
|
|
|
|
)
|
|
|
|
|
|
|
|
var INFINITE_REPS int = -1 // Represents infinite reps eg. the end range in {5,}
|
|
|
|
// This represents a node in the postfix representation of the expression
|
|
|
|
type postfixNode struct {
|
|
|
|
nodetype NodeType
|
|
|
|
contents []rune // Contents of the node - the length of this would only be >1 if the node represents a character class
|
|
|
|
startReps int // How many times the node should be repeated - used with numeric specifiers
|
|
|
|
endReps int
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates a new escaped node - the given character is assumed to have been preceded by a backslash
|
|
|
|
func newEscapedNode(c rune) postfixNode {
|
|
|
|
toReturn := postfixNode{}
|
|
|
|
toReturn.startReps = 1
|
|
|
|
toReturn.endReps = 1
|
|
|
|
switch c {
|
|
|
|
case 's': // Whitespace
|
|
|
|
toReturn.nodetype = CHARACTER
|
|
|
|
toReturn.contents = append(toReturn.contents, whitespaceChars...)
|
|
|
|
case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it
|
|
|
|
toReturn.nodetype = CHARACTER
|
|
|
|
toReturn.contents = append(toReturn.contents, slices.DeleteFunc(dotChars(), func(r rune) bool {
|
|
|
|
return slices.Contains(whitespaceChars, r)
|
|
|
|
})...)
|
|
|
|
case 'd': // Digits
|
|
|
|
toReturn.nodetype = CHARACTER
|
|
|
|
toReturn.contents = append(toReturn.contents, digitChars...)
|
|
|
|
case 'D': // Non-digits - same fancy way as 'S'
|
|
|
|
toReturn.nodetype = CHARACTER
|
|
|
|
toReturn.contents = append(toReturn.contents, slices.DeleteFunc(dotChars(), func(r rune) bool {
|
|
|
|
return slices.Contains(digitChars, r)
|
|
|
|
})...)
|
|
|
|
case 'w': // word character
|
|
|
|
toReturn.nodetype = CHARACTER
|
|
|
|
toReturn.contents = append(toReturn.contents, wordChars...)
|
|
|
|
case 'W': // Non-word character - same fancy way as 'S' and 'D'
|
|
|
|
toReturn.nodetype = CHARACTER
|
|
|
|
toReturn.contents = append(toReturn.contents, slices.DeleteFunc(dotChars(), func(r rune) bool {
|
|
|
|
return slices.Contains(wordChars, r)
|
|
|
|
})...)
|
|
|
|
case 'b', 'B':
|
|
|
|
toReturn.nodetype = ASSERTION
|
|
|
|
toReturn.contents = append(toReturn.contents, c)
|
|
|
|
default: // None of the above - append it as a regular character
|
|
|
|
toReturn.nodetype = CHARACTER
|
|
|
|
toReturn.contents = append(toReturn.contents, c)
|
|
|
|
}
|
|
|
|
return toReturn
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates and returns a postfixNode based on the given contents
|
|
|
|
func newPostfixNode(contents ...rune) postfixNode {
|
|
|
|
if len(contents) < 1 {
|
|
|
|
panic("Empty node.")
|
|
|
|
}
|
|
|
|
to_return := postfixNode{}
|
|
|
|
to_return.startReps = 1
|
|
|
|
to_return.endReps = 1
|
|
|
|
if len(contents) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER
|
|
|
|
to_return.nodetype = CHARACTER
|
|
|
|
to_return.contents = contents
|
|
|
|
} else { // Node has one element, could be anything
|
|
|
|
switch contents[0] {
|
|
|
|
case '+':
|
|
|
|
to_return.nodetype = PLUS
|
|
|
|
case '?':
|
|
|
|
to_return.nodetype = QUESTION
|
|
|
|
case '*':
|
|
|
|
to_return.nodetype = KLEENE
|
|
|
|
case '|':
|
|
|
|
to_return.nodetype = PIPE
|
|
|
|
case CONCAT:
|
|
|
|
to_return.nodetype = CONCATENATE
|
|
|
|
case '^', '$':
|
|
|
|
to_return.nodetype = ASSERTION
|
|
|
|
default:
|
|
|
|
to_return.nodetype = CHARACTER
|
|
|
|
}
|
|
|
|
to_return.contents = append(to_return.contents, contents...)
|
|
|
|
}
|
|
|
|
return to_return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates a character node, regardless of the contents
|
|
|
|
func newPostfixCharNode(contents ...rune) postfixNode {
|
|
|
|
toReturn := postfixNode{}
|
|
|
|
toReturn.nodetype = CHARACTER
|
|
|
|
toReturn.contents = append(toReturn.contents, contents...)
|
|
|
|
return toReturn
|
|
|
|
}
|