From 099612ae7fb3975b81c137e520a80c4b7afb6d07 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Mon, 20 Jan 2025 18:04:05 -0500 Subject: [PATCH] Bug fixes, changed the way I parse octal values --- compile.go | 58 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/compile.go b/compile.go index 91724e1..c74cad0 100644 --- a/compile.go +++ b/compile.go @@ -2,7 +2,6 @@ package main import ( "fmt" - "math" "slices" "strconv" "unicode" @@ -145,8 +144,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { if i < len(re_runes)-1 && re_runes[i+1] == ']' { // Nothing inside brackets - panic. return nil, fmt.Errorf("Empty character class.") } - for re_runes[i] != ']' { - i++ // Skip all characters inside brackets + for re_runes[i] != ']' || i == 0 || re_runes[i-1] == '\\' { + i++ // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash) // TODO: Check for escaped characters // Check ahead for character range @@ -303,29 +302,34 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { if err != nil { return nil, fmt.Errorf("Error parsing hex characters in expression.") } - i += 2 + i++ // Loop increment will take care of going forward outQueue = append(outQueue, newPostfixCharNode(rune(hexVal))) } else { return nil, fmt.Errorf("Not enough hex characters found in expression.") } } else if isOctal(re_postfix[i]) { // Octal value - var octVal int - n, err := fmt.Sscanf(string(re_postfix[i:]), "%d", &octVal) - if n < 1 || err != nil { + var octVal int64 + var octValStr string + numDigitsParsed := 0 + for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 { + octValStr += string(re_postfix[i+numDigitsParsed]) + numDigitsParsed++ + } + octVal, err := strconv.ParseInt(octValStr, 8, 32) + if err != nil { return nil, fmt.Errorf("Error parsing octal value in expression.") } - if octVal > 777 { + if octVal > 0777 { return nil, fmt.Errorf("Invalid octal value in expression.") } - i += int(math.Ceil(math.Log10(float64(octVal)))) // Shift forward by the number of digits that were parsed - i-- // Move back one character, because the loop increment will move us back to the next character automatically - octValBase10, err := strconv.ParseInt(strconv.Itoa(octVal), 8, 0) + i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically + outQueue = append(outQueue, newPostfixCharNode(rune(octVal))) + } else { + escapedNode, err := newEscapedNode(re_postfix[i], false) if err != nil { - return nil, fmt.Errorf("Error parsing octal value in expression.") + return nil, fmt.Errorf("Invalid escape character in expression.") } - outQueue = append(outQueue, newPostfixCharNode(rune(octValBase10))) - } else { - outQueue = append(outQueue, newEscapedNode(re_postfix[i])) + outQueue = append(outQueue, escapedNode) } continue // Escaped character will automatically be skipped when loop variable increments } @@ -446,19 +450,30 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { } else { return nil, fmt.Errorf("Not enough hex characters found in character class.") } - } else if unicode.IsDigit(re_postfix[i]) { // Octal value - var octVal int - n, err := fmt.Sscanf(string(re_postfix[i:]), "%d", &octVal) - if n < 1 || err != nil { + } else if isOctal(re_postfix[i]) { // Octal value + var octVal int64 + var octValStr string + numDigitsParsed := 0 + for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else) + octValStr += string(re_postfix[i+numDigitsParsed]) + numDigitsParsed++ + } + octVal, err := strconv.ParseInt(octValStr, 8, 32) + if err != nil { return nil, fmt.Errorf("Error parsing octal value in character class.") } if octVal > 0777 { return nil, fmt.Errorf("Invalid octal value in character class.") } - i += int(math.Ceil(math.Log10(float64(octVal)) / math.Log10(8))) // Shift forward by the number of digits that were parsed + i += numDigitsParsed // Shift forward by the number of characters parsed chars = append(chars, newPostfixCharNode(rune(octVal))) } else { - chars = append(chars, newEscapedNode(re_postfix[i])) + escapedNode, err := newEscapedNode(re_postfix[i], true) + if err != nil { + return nil, fmt.Errorf("Invalid escape character in character class.") + } + chars = append(chars, escapedNode) + i++ } } else { chars = append(chars, newPostfixCharNode(re_postfix[i])) @@ -591,6 +606,7 @@ func thompson(re []postfixNode) (Reg, error) { // - If the node doesn't have exceptions (allChars == false) then the contents of the node are added to the except list. for _, node := range c.except { if node.allChars { + state.allChars = false // For each postfixNode in node.except, extract the contents of the postfixNode. Concatenate them all, // and them to the state's _content_. As mentioned above, if the exception has exceptions, then we can match // those.