@ -10,16 +10,16 @@ var escapedChars []rune = []rune("wWdDbBnaftrvsS0")
// This is a list of the possible node types
const (
CHARACTER NodeType = iota
CHARCLASS
PIPE
CONCATENATE
KLEENE
QUESTION
PLUS
ASSERTION
LPAREN
RPAREN
characterNode NodeType = iota
charclassNode
pipeNode
concatenateNode
kleeneNode
questionNode
plusNode
assertionNode
lparenNode
rparenNode
)
// Helper constants for lookarounds
@ -49,11 +49,11 @@ type postfixNode struct {
// it will not match.
func newCharClassNode ( nodes [ ] postfixNode , negated bool ) postfixNode {
rtv := postfixNode { }
rtv . nodetype = CHARCLASS
rtv . nodetype = charclassNode
rtv . startReps = 1
rtv . endReps = 1
if negated {
rtv . nodetype = CHARACTER
rtv . nodetype = characterNode
rtv . contents = [ ] rune { ANY_CHAR }
rtv . allChars = true
rtv . except = nodes
@ -70,55 +70,55 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
toReturn . endReps = 1
switch c {
case 's' : // Whitespace
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , whitespaceChars ... )
case 'S' : // Non-whitespace
toReturn = newPostfixDotNode ( )
toReturn . except = append ( [ ] postfixNode { } , newPostfixNode ( whitespaceChars ... ) )
case 'd' : // Digits
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , digitChars ... )
case 'D' : // Non-digits
toReturn = newPostfixDotNode ( )
toReturn . except = append ( [ ] postfixNode { } , newPostfixNode ( digitChars ... ) )
case 'w' : // word character
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , wordChars ... )
case 'W' : // Non-word character
toReturn = newPostfixDotNode ( )
toReturn . except = append ( [ ] postfixNode { } , newPostfixNode ( wordChars ... ) )
case 'b' , 'B' :
if c == 'b' && inCharClass {
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , rune ( 8 ) )
} else {
toReturn . nodetype = ASSERTION
toReturn . nodetype = assertionNode
toReturn . contents = append ( toReturn . contents , c )
}
case 'n' : // Newline character
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , '\n' )
case '0' : // NULL character
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , rune ( 0 ) )
case 'a' : // Bell character
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , rune ( 7 ) )
case 'f' : // Form feed character
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , rune ( 12 ) )
case 't' : // Horizontal tab character
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , rune ( 9 ) )
case 'r' : // Carriage return
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , rune ( 13 ) )
case 'v' : // Vertical tab
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , rune ( 11 ) )
case '-' : // Literal hyphen - only in character class
if inCharClass {
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , '-' )
} else {
return postfixNode { } , fmt . Errorf ( "invalid escape character" )
@ -127,7 +127,7 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
if isNormalChar ( c ) { // Normal characters cannot be escaped
return postfixNode { } , fmt . Errorf ( "invalid escape character" )
}
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , c )
}
return toReturn , nil
@ -142,36 +142,36 @@ func newPostfixNode(contents ...rune) postfixNode {
to_return . startReps = 1
to_return . endReps = 1
if len ( contents ) > 1 { // If the node has more than element, it must be a character class - the type must be CHARACTER
to_return . nodetype = CHARACTER
to_return . nodetype = characterNode
to_return . contents = contents
} else { // Node has one element, could be anything
switch contents [ 0 ] {
case '+' :
to_return . nodetype = PLUS
to_return . nodetype = plusNode
case '?' :
to_return . nodetype = QUESTION
to_return . nodetype = questionNode
case '*' :
to_return . nodetype = KLEENE
to_return . nodetype = kleeneNode
case '|' :
to_return . nodetype = PIPE
to_return . nodetype = pipeNode
case CONCAT :
to_return . nodetype = CONCATENATE
to_return . nodetype = concatenateNode
case '^' , '$' :
to_return . nodetype = ASSERTION
to_return . nodetype = assertionNode
case '(' :
to_return . nodetype = LPAREN
to_return . nodetype = lparenNode
case ')' :
to_return . nodetype = RPAREN
to_return . nodetype = rparenNode
default :
to_return . nodetype = CHARACTER
to_return . nodetype = characterNode
}
to_return . contents = append ( to_return . contents , contents ... )
// Special cases for LPAREN and RPAREN - they have special characters defined for them
if to_return . nodetype == LPAREN {
if to_return . nodetype == lparenNode {
to_return . contents = [ ] rune { LPAREN_CHAR }
}
if to_return . nodetype == RPAREN {
if to_return . nodetype == rparenNode {
to_return . contents = [ ] rune { RPAREN_CHAR }
}
}
@ -183,7 +183,7 @@ func newPostfixDotNode() postfixNode {
toReturn := postfixNode { }
toReturn . startReps = 1
toReturn . endReps = 1
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . allChars = true
toReturn . contents = [ ] rune { ANY_CHAR }
return toReturn
@ -194,7 +194,7 @@ func newPostfixCharNode(contents ...rune) postfixNode {
toReturn := postfixNode { }
toReturn . startReps = 1
toReturn . endReps = 1
toReturn . nodetype = CHARACTER
toReturn . nodetype = characterNode
toReturn . contents = append ( toReturn . contents , contents ... )
return toReturn
}