Bug fixes, changed the way I parse octal values
This commit is contained in:
60
compile.go
60
compile.go
@@ -2,7 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
|
||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"unicode"
|
"unicode"
|
||||||
@@ -145,8 +144,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
|
if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic.
|
||||||
return nil, fmt.Errorf("Empty character class.")
|
return nil, fmt.Errorf("Empty character class.")
|
||||||
}
|
}
|
||||||
for re_runes[i] != ']' {
|
for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' {
|
||||||
i++ // Skip all characters inside brackets
|
i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
|
||||||
// TODO: Check for escaped characters
|
// TODO: Check for escaped characters
|
||||||
|
|
||||||
// Check ahead for character range
|
// Check ahead for character range
|
||||||
@@ -303,29 +302,34 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("Error parsing hex characters in expression.")
|
return nil, fmt.Errorf("Error parsing hex characters in expression.")
|
||||||
}
|
}
|
||||||
i += 2
|
i++ // Loop increment will take care of going forward
|
||||||
outQueue = append(outQueue, newPostfixCharNode(rune(hexVal)))
|
outQueue = append(outQueue, newPostfixCharNode(rune(hexVal)))
|
||||||
} else {
|
} else {
|
||||||
return nil, fmt.Errorf("Not enough hex characters found in expression.")
|
return nil, fmt.Errorf("Not enough hex characters found in expression.")
|
||||||
}
|
}
|
||||||
} else if isOctal(re_postfix[i]) { // Octal value
|
} else if isOctal(re_postfix[i]) { // Octal value
|
||||||
var octVal int
|
var octVal int64
|
||||||
n, err := fmt.Sscanf(string(re_postfix[i:]), "%d", &octVal)
|
var octValStr string
|
||||||
if n < 1 || err != nil {
|
numDigitsParsed := 0
|
||||||
return nil, fmt.Errorf("Error parsing octal value in expression.")
|
for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 {
|
||||||
|
octValStr += string(re_postfix[i+numDigitsParsed])
|
||||||
|
numDigitsParsed++
|
||||||
}
|
}
|
||||||
if octVal > 777 {
|
octVal, err := strconv.ParseInt(octValStr, 8, 32)
|
||||||
return nil, fmt.Errorf("Invalid octal value in expression.")
|
|
||||||
}
|
|
||||||
i += int(math.Ceil(math.Log10(float64(octVal)))) // Shift forward by the number of digits that were parsed
|
|
||||||
i-- // Move back one character, because the loop increment will move us back to the next character automatically
|
|
||||||
octValBase10, err := strconv.ParseInt(strconv.Itoa(octVal), 8, 0)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("Error parsing octal value in expression.")
|
return nil, fmt.Errorf("Error parsing octal value in expression.")
|
||||||
}
|
}
|
||||||
outQueue = append(outQueue, newPostfixCharNode(rune(octValBase10)))
|
if octVal > 0777 {
|
||||||
|
return nil, fmt.Errorf("Invalid octal value in expression.")
|
||||||
|
}
|
||||||
|
i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically
|
||||||
|
outQueue = append(outQueue, newPostfixCharNode(rune(octVal)))
|
||||||
} else {
|
} else {
|
||||||
outQueue = append(outQueue, newEscapedNode(re_postfix[i]))
|
escapedNode, err := newEscapedNode(re_postfix[i], false)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("Invalid escape character in expression.")
|
||||||
|
}
|
||||||
|
outQueue = append(outQueue, escapedNode)
|
||||||
}
|
}
|
||||||
continue // Escaped character will automatically be skipped when loop variable increments
|
continue // Escaped character will automatically be skipped when loop variable increments
|
||||||
}
|
}
|
||||||
@@ -446,19 +450,30 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
} else {
|
} else {
|
||||||
return nil, fmt.Errorf("Not enough hex characters found in character class.")
|
return nil, fmt.Errorf("Not enough hex characters found in character class.")
|
||||||
}
|
}
|
||||||
} else if unicode.IsDigit(re_postfix[i]) { // Octal value
|
} else if isOctal(re_postfix[i]) { // Octal value
|
||||||
var octVal int
|
var octVal int64
|
||||||
n, err := fmt.Sscanf(string(re_postfix[i:]), "%d", &octVal)
|
var octValStr string
|
||||||
if n < 1 || err != nil {
|
numDigitsParsed := 0
|
||||||
|
for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
|
||||||
|
octValStr += string(re_postfix[i+numDigitsParsed])
|
||||||
|
numDigitsParsed++
|
||||||
|
}
|
||||||
|
octVal, err := strconv.ParseInt(octValStr, 8, 32)
|
||||||
|
if err != nil {
|
||||||
return nil, fmt.Errorf("Error parsing octal value in character class.")
|
return nil, fmt.Errorf("Error parsing octal value in character class.")
|
||||||
}
|
}
|
||||||
if octVal > 0777 {
|
if octVal > 0777 {
|
||||||
return nil, fmt.Errorf("Invalid octal value in character class.")
|
return nil, fmt.Errorf("Invalid octal value in character class.")
|
||||||
}
|
}
|
||||||
i += int(math.Ceil(math.Log10(float64(octVal)) / math.Log10(8))) // Shift forward by the number of digits that were parsed
|
i += numDigitsParsed // Shift forward by the number of characters parsed
|
||||||
chars = append(chars, newPostfixCharNode(rune(octVal)))
|
chars = append(chars, newPostfixCharNode(rune(octVal)))
|
||||||
} else {
|
} else {
|
||||||
chars = append(chars, newEscapedNode(re_postfix[i]))
|
escapedNode, err := newEscapedNode(re_postfix[i], true)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("Invalid escape character in character class.")
|
||||||
|
}
|
||||||
|
chars = append(chars, escapedNode)
|
||||||
|
i++
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chars = append(chars, newPostfixCharNode(re_postfix[i]))
|
chars = append(chars, newPostfixCharNode(re_postfix[i]))
|
||||||
@@ -591,6 +606,7 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
// - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list.
|
// - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list.
|
||||||
for _, node := range c.except {
|
for _, node := range c.except {
|
||||||
if node.allChars {
|
if node.allChars {
|
||||||
|
state.allChars = false
|
||||||
// For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all,
|
// For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all,
|
||||||
// and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match
|
// and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match
|
||||||
// those.
|
// those.
|
||||||
|
Reference in New Issue
Block a user