Bug fixes, changed the way I parse octal values

master
Aadhavan Srinivasan 2 days ago
parent 9115858261
commit 099612ae7f

@ -2,7 +2,6 @@ package main
import (
"fmt"
"math"
"slices"
"strconv"
"unicode"
@ -145,8 +144,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
return nil, fmt.Errorf("Empty character class.")
}
for re_runes[i] != ']' {
i++ // Skip all characters inside brackets
for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// TODO: Check for escaped characters
// Check ahead for character range
@ -303,29 +302,34 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if err != nil {
return nil, fmt.Errorf("Error parsing hex characters in expression.")
}
i += 2
i++ // Loop increment will take care of going forward
outQueue = append(outQueue, newPostfixCharNode(rune(hexVal)))
} else {
return nil, fmt.Errorf("Not enough hex characters found in expression.")
}
} else if isOctal(re_postfix[i]) { // Octal value
var octVal int
n, err := fmt.Sscanf(string(re_postfix[i:]), "%d", &octVal)
if n < 1 || err != nil {
var octVal int64
var octValStr string
numDigitsParsed := 0
for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 {
octValStr += string(re_postfix[i+numDigitsParsed])
numDigitsParsed++
}
octVal, err := strconv.ParseInt(octValStr, 8, 32)
if err != nil {
return nil, fmt.Errorf("Error parsing octal value in expression.")
}
if octVal > 777 {
if octVal > 0777 {
return nil, fmt.Errorf("Invalid octal value in expression.")
}
i += int(math.Ceil(math.Log10(float64(octVal)))) // Shift forward by the number of digits that were parsed
i-- // Move back one character, because the loop increment will move us back to the next character automatically
octValBase10, err := strconv.ParseInt(strconv.Itoa(octVal), 8, 0)
i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically
outQueue = append(outQueue, newPostfixCharNode(rune(octVal)))
} else {
escapedNode, err := newEscapedNode(re_postfix[i], false)
if err != nil {
return nil, fmt.Errorf("Error parsing octal value in expression.")
return nil, fmt.Errorf("Invalid escape character in expression.")
}
outQueue = append(outQueue, newPostfixCharNode(rune(octValBase10)))
} else {
outQueue = append(outQueue, newEscapedNode(re_postfix[i]))
outQueue = append(outQueue, escapedNode)
}
continue // Escaped character will automatically be skipped when loop variable increments
}
@ -446,19 +450,30 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else {
return nil, fmt.Errorf("Not enough hex characters found in character class.")
}
} else if unicode.IsDigit(re_postfix[i]) { // Octal value
var octVal int
n, err := fmt.Sscanf(string(re_postfix[i:]), "%d", &octVal)
if n < 1 || err != nil {
} else if isOctal(re_postfix[i]) { // Octal value
var octVal int64
var octValStr string
numDigitsParsed := 0
for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
octValStr += string(re_postfix[i+numDigitsParsed])
numDigitsParsed++
}
octVal, err := strconv.ParseInt(octValStr, 8, 32)
if err != nil {
return nil, fmt.Errorf("Error parsing octal value in character class.")
}
if octVal > 0777 {
return nil, fmt.Errorf("Invalid octal value in character class.")
}
i += int(math.Ceil(math.Log10(float64(octVal)) / math.Log10(8))) // Shift forward by the number of digits that were parsed
i += numDigitsParsed // Shift forward by the number of characters parsed
chars = append(chars, newPostfixCharNode(rune(octVal)))
} else {
chars = append(chars, newEscapedNode(re_postfix[i]))
escapedNode, err := newEscapedNode(re_postfix[i], true)
if err != nil {
return nil, fmt.Errorf("Invalid escape character in character class.")
}
chars = append(chars, escapedNode)
i++
}
} else {
chars = append(chars, newPostfixCharNode(re_postfix[i]))
@ -591,6 +606,7 @@ func thompson(re []postfixNode) (Reg, error) {
// - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list.
for _, node := range c.except {
if node.allChars {
state.allChars = false
// For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all,
// and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match
// those.

Loading…
Cancel
Save