regex/postfixNode.go

132 lines
4.1 KiB
Go

package main
import (
"slices"
)
type NodeType int
// This is a list of the possible node types
const (
CHARACTER NodeType = iota
PIPE
CONCATENATE
KLEENE
QUESTION
PLUS
ASSERTION
LPAREN
RPAREN
)
var INFINITE_REPS int = -1 // Represents infinite reps eg. the end range in {5,}
// This represents a node in the postfix representation of the expression
type postfixNode struct {
nodetype NodeType
contents []rune // Contents of the node - the length of this would only be >1 if the node represents a character class
startReps int // Minimum number of times the node should be repeated - used with numeric specifiers
endReps int // Maximum number of times the node should be repeated - used with numeric specifiers
isDot bool // Whether or not the current node represents a 'dot' metacharacter
}
// Creates a new escaped node - the given character is assumed to have been preceded by a backslash
func newEscapedNode(c rune) postfixNode {
toReturn := postfixNode{}
toReturn.startReps = 1
toReturn.endReps = 1
switch c {
case 's': // Whitespace
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, whitespaceChars...)
case 'S': // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, slices.DeleteFunc(dotChars(), func(r rune) bool {
return slices.Contains(whitespaceChars, r)
})...)
case 'd': // Digits
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, digitChars...)
case 'D': // Non-digits - same fancy way as 'S'
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, slices.DeleteFunc(dotChars(), func(r rune) bool {
return slices.Contains(digitChars, r)
})...)
case 'w': // word character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, wordChars...)
case 'W': // Non-word character - same fancy way as 'S' and 'D'
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, slices.DeleteFunc(dotChars(), func(r rune) bool {
return slices.Contains(wordChars, r)
})...)
case 'b', 'B':
toReturn.nodetype = ASSERTION
toReturn.contents = append(toReturn.contents, c)
case 'n': // Newline character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, '\n')
default: // None of the above - append it as a regular character
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, c)
}
return toReturn
}
// Creates and returns a postfixNode based on the given contents
func newPostfixNode(contents ...rune) postfixNode {
if len(contents) < 1 {
panic("Empty node.")
}
to_return := postfixNode{}
to_return.startReps = 1
to_return.endReps = 1
if len(contents) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER
to_return.nodetype = CHARACTER
to_return.contents = contents
} else { // Node has one element, could be anything
switch contents[0] {
case '+':
to_return.nodetype = PLUS
case '?':
to_return.nodetype = QUESTION
case '*':
to_return.nodetype = KLEENE
case '|':
to_return.nodetype = PIPE
case CONCAT:
to_return.nodetype = CONCATENATE
case '^', '$':
to_return.nodetype = ASSERTION
case '(':
to_return.nodetype = LPAREN
case ')':
to_return.nodetype = RPAREN
default:
to_return.nodetype = CHARACTER
}
to_return.contents = append(to_return.contents, contents...)
}
return to_return
}
// Creates and returns a postfixNode representing the 'dot' metacharacter.
func newPostfixDotNode() postfixNode {
toReturn := postfixNode{}
toReturn.startReps = 1
toReturn.endReps = 1
toReturn.nodetype = CHARACTER
toReturn.isDot = true
toReturn.contents = []rune{ANY_CHAR}
return toReturn
}
// Creates a character node, regardless of the contents
func newPostfixCharNode(contents ...rune) postfixNode {
toReturn := postfixNode{}
toReturn.startReps = 1
toReturn.endReps = 1
toReturn.nodetype = CHARACTER
toReturn.contents = append(toReturn.contents, contents...)
return toReturn
}