Started implementing backreferences (octal values should now be prefaced with \0)
This commit is contained in:
@@ -313,13 +313,20 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid hex value in expression")
|
||||
}
|
||||
} else if isOctal(re_runes[i]) {
|
||||
} else if re_runes[i] == '0' { // Start of octal value
|
||||
numDigits := 1
|
||||
for i+numDigits < len(re_runes) && numDigits < 3 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 3)
|
||||
for i+numDigits < len(re_runes) && numDigits < 4 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 4, starting with 0)
|
||||
numDigits++
|
||||
}
|
||||
re_postfix = append(re_postfix, re_runes[i:i+numDigits]...)
|
||||
i += (numDigits - 1) // I have to move back a step, so that I can add a concatenation operator if necessary, and so that the increment at the bottom of the loop works as intended
|
||||
} else if unicode.IsDigit(re_runes[i]) { // Any other number - backreference
|
||||
numDigits := 1
|
||||
for i+numDigits < len(re_runes) && unicode.IsDigit(re_runes[i+numDigits]) { // Skip while we see a digit
|
||||
numDigits++
|
||||
}
|
||||
re_postfix = append(re_postfix, re_runes[i:i+numDigits]...)
|
||||
i += (numDigits - 1) // Move back a step to add concatenation operator
|
||||
} else {
|
||||
re_postfix = append(re_postfix, re_runes[i])
|
||||
}
|
||||
@@ -364,7 +371,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
outQueue := make([]postfixNode, 0) // Output queue
|
||||
|
||||
// Actual algorithm
|
||||
numOpenParens := 0 // Number of open parentheses
|
||||
numOpenParens := 0 // Number of open parentheses
|
||||
parenIndices := make([]Group, 0) // I really shouldn't be using Group here, because that's strictly for matching purposes, but its a convenient way to store the indices of the opening and closing parens.
|
||||
parenIndices = append(parenIndices, Group{0, 0}) // I append a weird value here, because the 0-th group doesn't have any parens. This way, the 1st group will be at index 1, 2nd at 2 ...
|
||||
for i := 0; i < len(re_postfix); i++ {
|
||||
/* Two cases:
|
||||
1. Current character is alphanumeric - send to output queue
|
||||
@@ -420,11 +429,11 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
} else {
|
||||
return nil, fmt.Errorf("not enough hex characters found in expression")
|
||||
}
|
||||
} else if isOctal(re_postfix[i]) { // Octal value
|
||||
} else if re_postfix[i] == '0' { // Octal value
|
||||
var octVal int64
|
||||
var octValStr string
|
||||
numDigitsParsed := 0
|
||||
for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 {
|
||||
for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 4 {
|
||||
octValStr += string(re_postfix[i+numDigitsParsed])
|
||||
numDigitsParsed++
|
||||
}
|
||||
@@ -437,6 +446,20 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
}
|
||||
i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically
|
||||
outQueue = append(outQueue, newPostfixCharNode(rune(octVal)))
|
||||
} else if unicode.IsDigit(re_postfix[i]) { // Backreference
|
||||
var num int64
|
||||
var numStr string
|
||||
numDigitsParsed := 0
|
||||
for (i+numDigitsParsed) < len(re_postfix) && unicode.IsDigit(re_postfix[i+numDigitsParsed]) {
|
||||
numStr += string(re_postfix[i+numDigitsParsed])
|
||||
numDigitsParsed++
|
||||
}
|
||||
num, err := strconv.ParseInt(numStr, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing backreference in expresion")
|
||||
}
|
||||
i += numDigitsParsed - 1
|
||||
outQueue = append(outQueue, newPostfixBackreferenceNode(int(num)))
|
||||
} else {
|
||||
escapedNode, err := newEscapedNode(re_postfix[i], false)
|
||||
if err != nil {
|
||||
@@ -588,11 +611,11 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
} else {
|
||||
return nil, fmt.Errorf("not enough hex characters found in character class")
|
||||
}
|
||||
} else if isOctal(re_postfix[i]) { // Octal value
|
||||
} else if re_postfix[i] == '0' { // Octal value
|
||||
var octVal int64
|
||||
var octValStr string
|
||||
numDigitsParsed := 0
|
||||
for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
|
||||
for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 4 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
|
||||
octValStr += string(re_postfix[i+numDigitsParsed])
|
||||
numDigitsParsed++
|
||||
}
|
||||
@@ -796,6 +819,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
outQueue = append(outQueue, newPostfixNode(c))
|
||||
}
|
||||
numOpenParens++
|
||||
parenIndices = append(parenIndices, Group{StartIdx: len(outQueue) - 1}) // Push the index of the lparen into parenIndices
|
||||
}
|
||||
if c == ')' {
|
||||
// Keep popping from opStack until we encounter an opening parantheses or a NONCAPLPAREN_CHAR. Throw error if we reach the end of the stack.
|
||||
@@ -812,6 +836,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
if val == '(' { // Whatever was inside the parentheses was a _capturing_ group, so we append the closing parentheses as well
|
||||
outQueue = append(outQueue, newPostfixNode(')')) // Add closing parentheses
|
||||
}
|
||||
parenIndices[numOpenParens].EndIdx = len(outQueue) - 1
|
||||
numOpenParens--
|
||||
}
|
||||
}
|
||||
@@ -826,6 +851,11 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
return nil, fmt.Errorf("imbalanced parantheses")
|
||||
}
|
||||
|
||||
// outQueue, _, err := rewriteBackreferences(outQueue, parenIndices)
|
||||
// if err != nil {
|
||||
// return nil, err
|
||||
// }
|
||||
|
||||
return outQueue, nil
|
||||
}
|
||||
|
||||
@@ -1037,6 +1067,21 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
})
|
||||
nfa = append(nfa, toAdd)
|
||||
}
|
||||
if c.nodetype == backreferenceNode {
|
||||
if c.referencedGroup > numGroups {
|
||||
return Reg{}, fmt.Errorf("invalid backreference")
|
||||
}
|
||||
stateToAdd := &nfaState{}
|
||||
stateToAdd.assert = noneAssert
|
||||
stateToAdd.content = newContents(epsilon)
|
||||
stateToAdd.isEmpty = true
|
||||
stateToAdd.isBackreference = true
|
||||
stateToAdd.output = make([]*nfaState, 0)
|
||||
stateToAdd.output = append(stateToAdd.output, stateToAdd)
|
||||
stateToAdd.referredGroup = c.referencedGroup
|
||||
stateToAdd.threadBackref = 0
|
||||
nfa = append(nfa, stateToAdd)
|
||||
}
|
||||
// Must be an operator if it isn't a character
|
||||
switch c.nodetype {
|
||||
case concatenateNode:
|
||||
|
||||
Reference in New Issue
Block a user