@ -2,7 +2,6 @@ package main
import (
import (
"fmt"
"fmt"
"math"
"slices"
"slices"
"strconv"
"strconv"
"unicode"
"unicode"
@ -145,8 +144,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if i < len ( re_runes ) - 1 && re_runes [ i + 1 ] == ']' { // Nothing inside brackets - panic.
if i < len ( re_runes ) - 1 && re_runes [ i + 1 ] == ']' { // Nothing inside brackets - panic.
return nil , fmt . Errorf ( "Empty character class." )
return nil , fmt . Errorf ( "Empty character class." )
}
}
for re_runes [ i ] != ']' {
for re_runes [ i ] != ']' || i == 0 || re_runes [ i - 1 ] == '\\' {
i ++ // Skip all characters inside brackets
i ++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// TODO: Check for escaped characters
// TODO: Check for escaped characters
// Check ahead for character range
// Check ahead for character range
@ -303,29 +302,34 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if err != nil {
if err != nil {
return nil , fmt . Errorf ( "Error parsing hex characters in expression." )
return nil , fmt . Errorf ( "Error parsing hex characters in expression." )
}
}
i += 2
i ++ // Loop increment will take care of going forward
outQueue = append ( outQueue , newPostfixCharNode ( rune ( hexVal ) ) )
outQueue = append ( outQueue , newPostfixCharNode ( rune ( hexVal ) ) )
} else {
} else {
return nil , fmt . Errorf ( "Not enough hex characters found in expression." )
return nil , fmt . Errorf ( "Not enough hex characters found in expression." )
}
}
} else if isOctal ( re_postfix [ i ] ) { // Octal value
} else if isOctal ( re_postfix [ i ] ) { // Octal value
var octVal int
var octVal int64
n , err := fmt . Sscanf ( string ( re_postfix [ i : ] ) , "%d" , & octVal )
var octValStr string
if n < 1 || err != nil {
numDigitsParsed := 0
for ( i + numDigitsParsed ) < len ( re_postfix ) && isOctal ( re_postfix [ i + numDigitsParsed ] ) && numDigitsParsed <= 3 {
octValStr += string ( re_postfix [ i + numDigitsParsed ] )
numDigitsParsed ++
}
octVal , err := strconv . ParseInt ( octValStr , 8 , 32 )
if err != nil {
return nil , fmt . Errorf ( "Error parsing octal value in expression." )
return nil , fmt . Errorf ( "Error parsing octal value in expression." )
}
}
if octVal > 777 {
if octVal > 0 777 {
return nil , fmt . Errorf ( "Invalid octal value in expression." )
return nil , fmt . Errorf ( "Invalid octal value in expression." )
}
}
i += int ( math . Ceil ( math . Log10 ( float64 ( octVal ) ) ) ) // Shift forward by the number of digits that were parsed
i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically
i -- // Move back one character, because the loop increment will move us back to the next character automatically
outQueue = append ( outQueue , newPostfixCharNode ( rune ( octVal ) ) )
octValBase10 , err := strconv . ParseInt ( strconv . Itoa ( octVal ) , 8 , 0 )
} else {
escapedNode , err := newEscapedNode ( re_postfix [ i ] , false )
if err != nil {
if err != nil {
return nil , fmt . Errorf ( " Error parsing octal value in expression.")
return nil , fmt . Errorf ( " Invalid escape character in expression.")
}
}
outQueue = append ( outQueue , newPostfixCharNode ( rune ( octValBase10 ) ) )
outQueue = append ( outQueue , escapedNode )
} else {
outQueue = append ( outQueue , newEscapedNode ( re_postfix [ i ] ) )
}
}
continue // Escaped character will automatically be skipped when loop variable increments
continue // Escaped character will automatically be skipped when loop variable increments
}
}
@ -446,19 +450,30 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else {
} else {
return nil , fmt . Errorf ( "Not enough hex characters found in character class." )
return nil , fmt . Errorf ( "Not enough hex characters found in character class." )
}
}
} else if unicode . IsDigit ( re_postfix [ i ] ) { // Octal value
} else if isOctal ( re_postfix [ i ] ) { // Octal value
var octVal int
var octVal int64
n , err := fmt . Sscanf ( string ( re_postfix [ i : ] ) , "%d" , & octVal )
var octValStr string
if n < 1 || err != nil {
numDigitsParsed := 0
for ( i + numDigitsParsed ) < len ( re_postfix ) - 1 && isOctal ( re_postfix [ i + numDigitsParsed ] ) && numDigitsParsed <= 3 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
octValStr += string ( re_postfix [ i + numDigitsParsed ] )
numDigitsParsed ++
}
octVal , err := strconv . ParseInt ( octValStr , 8 , 32 )
if err != nil {
return nil , fmt . Errorf ( "Error parsing octal value in character class." )
return nil , fmt . Errorf ( "Error parsing octal value in character class." )
}
}
if octVal > 0777 {
if octVal > 0777 {
return nil , fmt . Errorf ( "Invalid octal value in character class." )
return nil , fmt . Errorf ( "Invalid octal value in character class." )
}
}
i += int ( math . Ceil ( math . Log10 ( float64 ( octVal ) ) / math . Log10 ( 8 ) ) ) // Shift forward by the number of digits that were parsed
i += numDigitsParsed // Shift forward by the number of characters parsed
chars = append ( chars , newPostfixCharNode ( rune ( octVal ) ) )
chars = append ( chars , newPostfixCharNode ( rune ( octVal ) ) )
} else {
} else {
chars = append ( chars , newEscapedNode ( re_postfix [ i ] ) )
escapedNode , err := newEscapedNode ( re_postfix [ i ] , true )
if err != nil {
return nil , fmt . Errorf ( "Invalid escape character in character class." )
}
chars = append ( chars , escapedNode )
i ++
}
}
} else {
} else {
chars = append ( chars , newPostfixCharNode ( re_postfix [ i ] ) )
chars = append ( chars , newPostfixCharNode ( re_postfix [ i ] ) )
@ -591,6 +606,7 @@ func thompson(re []postfixNode) (Reg, error) {
// - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list.
// - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list.
for _ , node := range c . except {
for _ , node := range c . except {
if node . allChars {
if node . allChars {
state . allChars = false
// For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all,
// For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all,
// and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match
// and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match
// those.
// those.