|
|
|
@@ -59,7 +59,7 @@ func priority(op rune) int {
|
|
|
|
|
func getPOSIXClass(str []rune) (bool, string) {
|
|
|
|
|
i := 0
|
|
|
|
|
rtv := ""
|
|
|
|
|
for i < len(str) && (str[i] != ':' && str[i] != RBRACKET) {
|
|
|
|
|
for i < len(str) && (str[i] != ':' && str[i] != rbracketRune) {
|
|
|
|
|
rtv += string(str[i])
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
@@ -69,7 +69,7 @@ func getPOSIXClass(str []rune) (bool, string) {
|
|
|
|
|
if str[i] != ':' { // The POSIX class must end with a colon and a closing bracket. It cannot end with a closing bracket first.
|
|
|
|
|
return false, ""
|
|
|
|
|
}
|
|
|
|
|
if str[i+1] != RBRACKET {
|
|
|
|
|
if str[i+1] != rbracketRune {
|
|
|
|
|
return false, ""
|
|
|
|
|
}
|
|
|
|
|
return true, rtv
|
|
|
|
@@ -174,13 +174,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
|
re_runes = append(re_runes, NONCAPLPAREN_CHAR)
|
|
|
|
|
i += 2
|
|
|
|
|
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
|
|
|
|
|
re_runes = append(re_runes, ESC_BACKSLASH)
|
|
|
|
|
re_runes = append(re_runes, escBackslashRune)
|
|
|
|
|
i++
|
|
|
|
|
} else if c == '[' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
|
|
|
|
|
re_runes = append(re_runes, LBRACKET)
|
|
|
|
|
re_runes = append(re_runes, lbracketRune)
|
|
|
|
|
continue
|
|
|
|
|
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
|
|
|
|
|
re_runes = append(re_runes, RBRACKET)
|
|
|
|
|
re_runes = append(re_runes, rbracketRune)
|
|
|
|
|
continue
|
|
|
|
|
} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
|
|
|
|
|
return nil, fmt.Errorf("non-greedy operators are not supported")
|
|
|
|
@@ -203,7 +203,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
|
i := 0
|
|
|
|
|
for i < len(re_runes) {
|
|
|
|
|
re_postfix = append(re_postfix, re_runes[i])
|
|
|
|
|
if re_runes[i] == LBRACKET && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped.
|
|
|
|
|
if re_runes[i] == lbracketRune && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped.
|
|
|
|
|
toAppend := make([]rune, 0) // Holds all the runes in the current character class
|
|
|
|
|
|
|
|
|
|
i++ // Skip past LBRACKET, because it was already added
|
|
|
|
@@ -211,13 +211,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
|
return nil, fmt.Errorf("opening bracket without closing bracket")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for re_runes[i] != RBRACKET || i == 0 || re_runes[i-1] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
|
|
|
|
|
for re_runes[i] != rbracketRune || i == 0 || re_runes[i-1] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
|
|
|
|
|
// Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
|
|
|
|
|
if i >= len(re_runes) {
|
|
|
|
|
return nil, fmt.Errorf("opening bracket without closing bracket")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if re_runes[i] == LBRACKET && re_runes[i+1] == ':' { // POSIX character class
|
|
|
|
|
if re_runes[i] == lbracketRune && re_runes[i+1] == ':' { // POSIX character class
|
|
|
|
|
toAppend = append(toAppend, re_runes[i])
|
|
|
|
|
i++
|
|
|
|
|
toAppend = append(toAppend, re_runes[i])
|
|
|
|
@@ -232,14 +232,14 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
|
toAppend = append(toAppend, re_runes[i])
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != RBRACKET) { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
|
|
|
|
|
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != rbracketRune) { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
|
|
|
|
|
re_runes[i] = CHAR_RANGE
|
|
|
|
|
}
|
|
|
|
|
toAppend = append(toAppend, re_runes[i])
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
// Add in the RBRACKET
|
|
|
|
|
toAppend = append(toAppend, RBRACKET)
|
|
|
|
|
toAppend = append(toAppend, rbracketRune)
|
|
|
|
|
re_postfix = append(re_postfix, toAppend...)
|
|
|
|
|
}
|
|
|
|
|
if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either
|
|
|
|
@@ -357,7 +357,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
|
// To deal with this, I make the following assertion:
|
|
|
|
|
// If at any point I see an RBRACKET 'in the wild' (not in a character class), then it must be
|
|
|
|
|
// a regular character, with no special significance.
|
|
|
|
|
if c == RBRACKET {
|
|
|
|
|
if c == rbracketRune {
|
|
|
|
|
outQueue = append(outQueue, newPostfixCharNode(']'))
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
@@ -496,7 +496,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if c == LBRACKET { // Used for character classes
|
|
|
|
|
if c == lbracketRune { // Used for character classes
|
|
|
|
|
firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
|
|
|
|
|
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
|
|
|
|
|
i++ // Step forward so we can look at the character class
|
|
|
|
@@ -521,7 +521,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
|
}
|
|
|
|
|
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
|
|
|
|
|
for i < len(re_postfix) {
|
|
|
|
|
if firstCharAdded && re_postfix[i] == RBRACKET {
|
|
|
|
|
if firstCharAdded && re_postfix[i] == rbracketRune {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if re_postfix[i] == CHAR_RANGE {
|
|
|
|
@@ -581,7 +581,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if re_postfix[i] == LBRACKET && i < len(re_postfix)-8 { // Could be the start of a POSIX class - the smallest POSIX class by word-length [[:word:]] takes 8 more characters
|
|
|
|
|
if re_postfix[i] == lbracketRune && i < len(re_postfix)-8 { // Could be the start of a POSIX class - the smallest POSIX class by word-length [[:word:]] takes 8 more characters
|
|
|
|
|
temp_i := i
|
|
|
|
|
temp_i++
|
|
|
|
|
if re_postfix[temp_i] == ':' {
|
|
|
|
@@ -643,9 +643,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|
|
|
|
// will prevent it from running, as the outer if-statement will have evaluated to true.
|
|
|
|
|
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
|
|
|
|
|
switch re_postfix[i] {
|
|
|
|
|
case LBRACKET:
|
|
|
|
|
case lbracketRune:
|
|
|
|
|
chars = append(chars, newPostfixCharNode('['))
|
|
|
|
|
case RBRACKET:
|
|
|
|
|
case rbracketRune:
|
|
|
|
|
chars = append(chars, newPostfixCharNode(']'))
|
|
|
|
|
default:
|
|
|
|
|
return nil, fmt.Errorf("error parsing high-range unicode value in character class")
|
|
|
|
@@ -912,8 +912,8 @@ func thompson(re []postfixNode) (Reg, error) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
|
|
|
|
|
replaceByValue([]int(stateToAdd.content), int(ESC_BACKSLASH), '\\')
|
|
|
|
|
replaceByValue(stateToAdd.except, ESC_BACKSLASH, '\\')
|
|
|
|
|
replaceByValue([]int(stateToAdd.content), int(escBackslashRune), '\\')
|
|
|
|
|
replaceByValue(stateToAdd.except, escBackslashRune, '\\')
|
|
|
|
|
|
|
|
|
|
nfa = append(nfa, &stateToAdd)
|
|
|
|
|
}
|
|
|
|
|