@ -815,10 +815,10 @@ func thompson(re []postfixNode) (Reg, error) {
for _ , c := range re {
for _ , c := range re {
if c . nodetype == characterNode || c . nodetype == assertionNode {
if c . nodetype == characterNode || c . nodetype == assertionNode {
state := State { }
state ToAdd := State { }
state . transitions = make ( map [ int ] [ ] * State )
state ToAdd . transitions = make ( map [ int ] [ ] * State )
if c . allChars {
if c . allChars {
state . allChars = true
state ToAdd . allChars = true
if len ( c . except ) != 0 {
if len ( c . except ) != 0 {
// For each node that I am 'excepting' (eg. in an inverted character class):
// For each node that I am 'excepting' (eg. in an inverted character class):
// - If the node itself has exceptions, then the exceptions cancel out.
// - If the node itself has exceptions, then the exceptions cancel out.
@ -827,7 +827,7 @@ func thompson(re []postfixNode) (Reg, error) {
// - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list.
// - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list.
for _ , node := range c . except {
for _ , node := range c . except {
if node . allChars {
if node . allChars {
state . allChars = false
state ToAdd . allChars = false
// For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all,
// For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all,
// and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match
// and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match
// those.
// those.
@ -840,7 +840,7 @@ func thompson(re []postfixNode) (Reg, error) {
}
}
return nodeContents
return nodeContents
} ) ... )
} ) ... )
state . content = rune2Contents ( nodeExceptChars )
state ToAdd . content = rune2Contents ( nodeExceptChars )
} else {
} else {
charsToAdd := node . contents
charsToAdd := node . contents
if caseInsensitive {
if caseInsensitive {
@ -848,7 +848,7 @@ func thompson(re []postfixNode) (Reg, error) {
return allCases ( r , caseInsensitive )
return allCases ( r , caseInsensitive )
} ) ... )
} ) ... )
}
}
state . except = append ( state . except , charsToAdd ... )
state ToAdd . except = append ( state ToAdd . except , charsToAdd ... )
}
}
}
}
}
}
@ -861,43 +861,43 @@ func thompson(re []postfixNode) (Reg, error) {
return allCases ( r , caseInsensitive )
return allCases ( r , caseInsensitive )
} ) ... )
} ) ... )
}
}
state . content = stateContents ( append ( [ ] int ( state . content ) , [ ] int ( rune2Contents ( runesToAdd ) ) ... ) )
state ToAdd . content = stateContents ( append ( [ ] int ( state ToAdd . content ) , [ ] int ( rune2Contents ( runesToAdd ) ) ... ) )
state . output = make ( [ ] * State , 0 )
state ToAdd . output = make ( [ ] * State , 0 )
state . output = append ( state . output , & state )
state ToAdd . output = append ( state ToAdd . output , & state ToAdd )
state . isEmpty = false
state ToAdd . isEmpty = false
if c . nodetype == assertionNode {
if c . nodetype == assertionNode {
state . isEmpty = true // This is a little weird. A lookaround has the 'isEmpty' flag set, even though it _isn't_ empty (the contents are the regex). But, there's so much error-checking that relies on this flag that it's better to keep it this way.
state ToAdd . isEmpty = true // This is a little weird. A lookaround has the 'isEmpty' flag set, even though it _isn't_ empty (the contents are the regex). But, there's so much error-checking that relies on this flag that it's better to keep it this way.
state . content = newContents ( EPSILON ) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string
state ToAdd . content = newContents ( EPSILON ) // Ideally, an assertion shouldn't have any content, since it doesn't say anything about the content of string
if c . lookaroundDir == 0 || c . lookaroundSign == 0 {
if c . lookaroundDir == 0 || c . lookaroundSign == 0 {
switch c . contents [ 0 ] {
switch c . contents [ 0 ] {
case '^' :
case '^' :
state . assert = sosAssert
state ToAdd . assert = sosAssert
case '$' :
case '$' :
state . assert = eosAssert
state ToAdd . assert = eosAssert
case 'b' :
case 'b' :
state . assert = wboundAssert
state ToAdd . assert = wboundAssert
case 'B' :
case 'B' :
state . assert = nonwboundAssert
state ToAdd . assert = nonwboundAssert
}
}
} else { // Lookaround
} else { // Lookaround
state . lookaroundRegex = string ( c . contents )
state ToAdd . lookaroundRegex = string ( c . contents )
if c . lookaroundDir == lookahead {
if c . lookaroundDir == lookahead {
if c . lookaroundSign == positive {
if c . lookaroundSign == positive {
state . assert = plaAssert
state ToAdd . assert = plaAssert
}
}
if c . lookaroundSign == negative {
if c . lookaroundSign == negative {
state . assert = nlaAssert
state ToAdd . assert = nlaAssert
}
}
}
}
if c . lookaroundDir == lookbehind {
if c . lookaroundDir == lookbehind {
if c . lookaroundSign == positive {
if c . lookaroundSign == positive {
state . assert = plbAssert
state ToAdd . assert = plbAssert
}
}
if c . lookaroundSign == negative {
if c . lookaroundSign == negative {
state . assert = nlbAssert
state ToAdd . assert = nlbAssert
}
}
}
}
tmpRe , err := shuntingYard ( state . lookaroundRegex )
tmpRe , err := shuntingYard ( state ToAdd . lookaroundRegex )
if err != nil {
if err != nil {
return Reg { } , fmt . Errorf ( "error parsing lookaround: %w" , err )
return Reg { } , fmt . Errorf ( "error parsing lookaround: %w" , err )
}
}
@ -905,17 +905,17 @@ func thompson(re []postfixNode) (Reg, error) {
if err != nil {
if err != nil {
return Reg { } , fmt . Errorf ( "error compiling lookaround: %w" , err )
return Reg { } , fmt . Errorf ( "error compiling lookaround: %w" , err )
}
}
state . lookaroundNFA = reg . start
state ToAdd . lookaroundNFA = reg . start
state . lookaroundNumCaptureGroups = reg . numGroups
state ToAdd . lookaroundNumCaptureGroups = reg . numGroups
}
}
}
}
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
replaceByValue ( [ ] int ( state . content ) , int ( ESC_BACKSLASH ) , '\\' )
replaceByValue ( [ ] int ( state ToAdd . content ) , int ( ESC_BACKSLASH ) , '\\' )
replaceByValue ( state . except , ESC_BACKSLASH , '\\' )
replaceByValue ( state ToAdd . except , ESC_BACKSLASH , '\\' )
nfa = append ( nfa , & state )
nfa = append ( nfa , & state ToAdd )
}
}
if c . nodetype == lparenNode || c . nodetype == rparenNode {
if c . nodetype == lparenNode || c . nodetype == rparenNode {
s := & State { }
s := & State { }