package main
type NodeType int
// This is a list of the possible node types
const (
CHARACTER NodeType = iota
PIPE
CONCATENATE
KLEENE
QUESTION
PLUS
ASSERTION
LPAREN
RPAREN
)
// Helper constants for lookarounds
const POSITIVE = 1
const NEGATIVE = - 1
const LOOKAHEAD = 1
const LOOKBEHIND = - 1
var INFINITE_REPS int = - 1 // Represents infinite reps eg. the end range in {5,}
// This represents a node in the postfix representation of the expression
type postfixNode struct {
nodetype NodeType
contents [ ] rune // Contents of the node
startReps int // Minimum number of times the node should be repeated - used with numeric specifiers
endReps int // Maximum number of times the node should be repeated - used with numeric specifiers
allChars bool // Whether or not the current node represents all characters (eg. dot metacharacter)
except [ ] rune // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
lookaroundSign int // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
lookaroundDir int // Lookbehind or lookahead
}
// Creates a new escaped node - the given character is assumed to have been preceded by a backslash
func newEscapedNode ( c rune ) postfixNode {
toReturn := postfixNode { }
toReturn . startReps = 1
toReturn . endReps = 1
switch c {
case 's' : // Whitespace
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , whitespaceChars ... )
case 'S' : // Non-whitespace
toReturn = newPostfixDotNode ( )
toReturn . except = append ( [ ] rune { } , whitespaceChars ... )
case 'd' : // Digits
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , digitChars ... )
case 'D' : // Non-digits
toReturn = newPostfixDotNode ( )
toReturn . except = append ( [ ] rune { } , digitChars ... )
case 'w' : // word character
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , wordChars ... )
case 'W' : // Non-word character
toReturn = newPostfixDotNode ( )
toReturn . except = append ( [ ] rune { } , wordChars ... )
case 'b' , 'B' :
toReturn . nodetype = ASSERTION
toReturn . contents = append ( toReturn . contents , c )
case 'n' : // Newline character
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , '\n' )
default : // None of the above - append it as a regular character
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , c )
}
return toReturn
}
// Creates and returns a postfixNode based on the given contents
func newPostfixNode ( contents ... rune ) postfixNode {
if len ( contents ) < 1 {
panic ( "Empty node." )
}
to_return := postfixNode { }
to_return . startReps = 1
to_return . endReps = 1
if len ( contents ) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER
to_return . nodetype = CHARACTER
to_return . contents = contents
} else { // Node has one element, could be anything
switch contents [ 0 ] {
case '+' :
to_return . nodetype = PLUS
case '?' :
to_return . nodetype = QUESTION
case '*' :
to_return . nodetype = KLEENE
case '|' :
to_return . nodetype = PIPE
case CONCAT :
to_return . nodetype = CONCATENATE
case '^' , '$' :
to_return . nodetype = ASSERTION
case '(' :
to_return . nodetype = LPAREN
case ')' :
to_return . nodetype = RPAREN
default :
to_return . nodetype = CHARACTER
}
to_return . contents = append ( to_return . contents , contents ... )
// Special cases for LPAREN and RPAREN - they have special characters defined for them
if to_return . nodetype == LPAREN {
to_return . contents = [ ] rune { LPAREN_CHAR }
}
if to_return . nodetype == RPAREN {
to_return . contents = [ ] rune { RPAREN_CHAR }
}
}
return to_return
}
// Creates and returns a postfixNode representing the 'dot' metacharacter.
func newPostfixDotNode ( ) postfixNode {
toReturn := postfixNode { }
toReturn . startReps = 1
toReturn . endReps = 1
toReturn . nodetype = CHARACTER
toReturn . allChars = true
toReturn . contents = [ ] rune { ANY_CHAR }
return toReturn
}
// Creates a character node, regardless of the contents
func newPostfixCharNode ( contents ... rune ) postfixNode {
toReturn := postfixNode { }
toReturn . startReps = 1
toReturn . endReps = 1
toReturn . nodetype = CHARACTER
toReturn . contents = append ( toReturn . contents , contents ... )
return toReturn
}