Bug fixes, changed the way I parse octal values

master
Aadhavan Srinivasan 2 days ago
parent 9115858261
commit 099612ae7f

@ -2,7 +2,6 @@ package main
import ( import (
"fmt" "fmt"
"math"
"slices" "slices"
"strconv" "strconv"
"unicode" "unicode"
@ -145,8 +144,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic. if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
return nil, fmt.Errorf("Empty character class.") return nil, fmt.Errorf("Empty character class.")
} }
for re_runes[i] != ']' { for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
i++ // Skip all characters inside brackets i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// TODO: Check for escaped characters // TODO: Check for escaped characters
// Check ahead for character range // Check ahead for character range
@ -303,29 +302,34 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("Error parsing hex characters in expression.") return nil, fmt.Errorf("Error parsing hex characters in expression.")
} }
i += 2 i++ // Loop increment will take care of going forward
outQueue = append(outQueue, newPostfixCharNode(rune(hexVal))) outQueue = append(outQueue, newPostfixCharNode(rune(hexVal)))
} else { } else {
return nil, fmt.Errorf("Not enough hex characters found in expression.") return nil, fmt.Errorf("Not enough hex characters found in expression.")
} }
} else if isOctal(re_postfix[i]) { // Octal value } else if isOctal(re_postfix[i]) { // Octal value
var octVal int var octVal int64
n, err := fmt.Sscanf(string(re_postfix[i:]), "%d", &octVal) var octValStr string
if n < 1 || err != nil { numDigitsParsed := 0
for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 {
octValStr += string(re_postfix[i+numDigitsParsed])
numDigitsParsed++
}
octVal, err := strconv.ParseInt(octValStr, 8, 32)
if err != nil {
return nil, fmt.Errorf("Error parsing octal value in expression.") return nil, fmt.Errorf("Error parsing octal value in expression.")
} }
if octVal > 777 { if octVal > 0777 {
return nil, fmt.Errorf("Invalid octal value in expression.") return nil, fmt.Errorf("Invalid octal value in expression.")
} }
i += int(math.Ceil(math.Log10(float64(octVal)))) // Shift forward by the number of digits that were parsed i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically
i-- // Move back one character, because the loop increment will move us back to the next character automatically outQueue = append(outQueue, newPostfixCharNode(rune(octVal)))
octValBase10, err := strconv.ParseInt(strconv.Itoa(octVal), 8, 0) } else {
escapedNode, err := newEscapedNode(re_postfix[i], false)
if err != nil { if err != nil {
return nil, fmt.Errorf("Error parsing octal value in expression.") return nil, fmt.Errorf("Invalid escape character in expression.")
} }
outQueue = append(outQueue, newPostfixCharNode(rune(octValBase10))) outQueue = append(outQueue, escapedNode)
} else {
outQueue = append(outQueue, newEscapedNode(re_postfix[i]))
} }
continue // Escaped character will automatically be skipped when loop variable increments continue // Escaped character will automatically be skipped when loop variable increments
} }
@ -446,19 +450,30 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else { } else {
return nil, fmt.Errorf("Not enough hex characters found in character class.") return nil, fmt.Errorf("Not enough hex characters found in character class.")
} }
} else if unicode.IsDigit(re_postfix[i]) { // Octal value } else if isOctal(re_postfix[i]) { // Octal value
var octVal int var octVal int64
n, err := fmt.Sscanf(string(re_postfix[i:]), "%d", &octVal) var octValStr string
if n < 1 || err != nil { numDigitsParsed := 0
for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
octValStr += string(re_postfix[i+numDigitsParsed])
numDigitsParsed++
}
octVal, err := strconv.ParseInt(octValStr, 8, 32)
if err != nil {
return nil, fmt.Errorf("Error parsing octal value in character class.") return nil, fmt.Errorf("Error parsing octal value in character class.")
} }
if octVal > 0777 { if octVal > 0777 {
return nil, fmt.Errorf("Invalid octal value in character class.") return nil, fmt.Errorf("Invalid octal value in character class.")
} }
i += int(math.Ceil(math.Log10(float64(octVal)) / math.Log10(8))) // Shift forward by the number of digits that were parsed i += numDigitsParsed // Shift forward by the number of characters parsed
chars = append(chars, newPostfixCharNode(rune(octVal))) chars = append(chars, newPostfixCharNode(rune(octVal)))
} else { } else {
chars = append(chars, newEscapedNode(re_postfix[i])) escapedNode, err := newEscapedNode(re_postfix[i], true)
if err != nil {
return nil, fmt.Errorf("Invalid escape character in character class.")
}
chars = append(chars, escapedNode)
i++
} }
} else { } else {
chars = append(chars, newPostfixCharNode(re_postfix[i])) chars = append(chars, newPostfixCharNode(re_postfix[i]))
@ -591,6 +606,7 @@ func thompson(re []postfixNode) (Reg, error) {
// - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list. // - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list.
for _, node := range c.except { for _, node := range c.except {
if node.allChars { if node.allChars {
state.allChars = false
// For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all, // For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all,
// and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match // and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match
// those. // those.

Loading…
Cancel
Save