package main
import (
"slices"
)
type NodeType int
// This is a list of the possible node types
const (
CHARACTER NodeType = iota
PIPE
CONCATENATE
KLEENE
QUESTION
PLUS
ASSERTION
LPAREN
RPAREN
)
var INFINITE_REPS int = - 1 // Represents infinite reps eg. the end range in {5,}
// This represents a node in the postfix representation of the expression
type postfixNode struct {
nodetype NodeType
contents [ ] rune // Contents of the node - the length of this would only be >1 if the node represents a character class
startReps int // Minimum number of times the node should be repeated - used with numeric specifiers
endReps int // Maximum number of times the node should be repeated - used with numeric specifiers
allChars bool // Whether or not the current node represents all characters (eg. dot metacharacter)
except [ ] rune // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
}
// Creates a new escaped node - the given character is assumed to have been preceded by a backslash
func newEscapedNode ( c rune ) postfixNode {
toReturn := postfixNode { }
toReturn . startReps = 1
toReturn . endReps = 1
switch c {
case 's' : // Whitespace
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , whitespaceChars ... )
case 'S' : // Non-whitespace - I am doing this in a fancy way, generating all dot characters, then removing whitespace characters from it
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , slices . DeleteFunc ( dotChars ( ) , func ( r rune ) bool {
return slices . Contains ( whitespaceChars , r )
} ) ... )
case 'd' : // Digits
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , digitChars ... )
case 'D' : // Non-digits - same fancy way as 'S'
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , slices . DeleteFunc ( dotChars ( ) , func ( r rune ) bool {
return slices . Contains ( digitChars , r )
} ) ... )
case 'w' : // word character
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , wordChars ... )
case 'W' : // Non-word character - same fancy way as 'S' and 'D'
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , slices . DeleteFunc ( dotChars ( ) , func ( r rune ) bool {
return slices . Contains ( wordChars , r )
} ) ... )
case 'b' , 'B' :
toReturn . nodetype = ASSERTION
toReturn . contents = append ( toReturn . contents , c )
case 'n' : // Newline character
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , '\n' )
default : // None of the above - append it as a regular character
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , c )
}
return toReturn
}
// Creates and returns a postfixNode based on the given contents
func newPostfixNode ( contents ... rune ) postfixNode {
if len ( contents ) < 1 {
panic ( "Empty node." )
}
to_return := postfixNode { }
to_return . startReps = 1
to_return . endReps = 1
if len ( contents ) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER
to_return . nodetype = CHARACTER
to_return . contents = contents
} else { // Node has one element, could be anything
switch contents [ 0 ] {
case '+' :
to_return . nodetype = PLUS
case '?' :
to_return . nodetype = QUESTION
case '*' :
to_return . nodetype = KLEENE
case '|' :
to_return . nodetype = PIPE
case CONCAT :
to_return . nodetype = CONCATENATE
case '^' , '$' :
to_return . nodetype = ASSERTION
case '(' :
to_return . nodetype = LPAREN
case ')' :
to_return . nodetype = RPAREN
default :
to_return . nodetype = CHARACTER
}
to_return . contents = append ( to_return . contents , contents ... )
}
return to_return
}
// Creates and returns a postfixNode representing the 'dot' metacharacter.
func newPostfixDotNode ( ) postfixNode {
toReturn := postfixNode { }
toReturn . startReps = 1
toReturn . endReps = 1
toReturn . nodetype = CHARACTER
toReturn . allChars = true
toReturn . contents = [ ] rune { ANY_CHAR }
return toReturn
}
// Creates a character node, regardless of the contents
func newPostfixCharNode ( contents ... rune ) postfixNode {
toReturn := postfixNode { }
toReturn . startReps = 1
toReturn . endReps = 1
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , contents ... )
return toReturn
}