Renamed more constants to avoid exporting

master
Aadhavan Srinivasan 3 days ago
parent b8f5b9af7c
commit 7e792f1248

@ -59,7 +59,7 @@ func priority(op rune) int {
func getPOSIXClass(str []rune) (bool, string) {
i := 0
rtv := ""
for i < len(str) && (str[i] != ':' && str[i] != RBRACKET) {
for i < len(str) && (str[i] != ':' && str[i] != rbracketRune) {
rtv += string(str[i])
i++
}
@ -69,7 +69,7 @@ func getPOSIXClass(str []rune) (bool, string) {
if str[i] != ':' { // The POSIX class must end with a colon and a closing bracket. It cannot end with a closing bracket first.
return false, ""
}
if str[i+1] != RBRACKET {
if str[i+1] != rbracketRune {
return false, ""
}
return true, rtv
@ -174,13 +174,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
re_runes = append(re_runes, NONCAPLPAREN_CHAR)
i += 2
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
re_runes = append(re_runes, ESC_BACKSLASH)
re_runes = append(re_runes, escBackslashRune)
i++
} else if c == '[' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
re_runes = append(re_runes, LBRACKET)
re_runes = append(re_runes, lbracketRune)
continue
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
re_runes = append(re_runes, RBRACKET)
re_runes = append(re_runes, rbracketRune)
continue
} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
return nil, fmt.Errorf("non-greedy operators are not supported")
@ -203,7 +203,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i := 0
for i < len(re_runes) {
re_postfix = append(re_postfix, re_runes[i])
if re_runes[i] == LBRACKET && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped.
if re_runes[i] == lbracketRune && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped.
toAppend := make([]rune, 0) // Holds all the runes in the current character class
i++ // Skip past LBRACKET, because it was already added
@ -211,13 +211,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
return nil, fmt.Errorf("opening bracket without closing bracket")
}
for re_runes[i] != RBRACKET || i == 0 || re_runes[i-1] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
for re_runes[i] != rbracketRune || i == 0 || re_runes[i-1] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
if i >= len(re_runes) {
return nil, fmt.Errorf("opening bracket without closing bracket")
}
if re_runes[i] == LBRACKET && re_runes[i+1] == ':' { // POSIX character class
if re_runes[i] == lbracketRune && re_runes[i+1] == ':' { // POSIX character class
toAppend = append(toAppend, re_runes[i])
i++
toAppend = append(toAppend, re_runes[i])
@ -232,14 +232,14 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
toAppend = append(toAppend, re_runes[i])
i++
}
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != RBRACKET) { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != rbracketRune) { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
re_runes[i] = CHAR_RANGE
}
toAppend = append(toAppend, re_runes[i])
i++
}
// Add in the RBRACKET
toAppend = append(toAppend, RBRACKET)
toAppend = append(toAppend, rbracketRune)
re_postfix = append(re_postfix, toAppend...)
}
if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either
@ -357,7 +357,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// To deal with this, I make the following assertion:
// If at any point I see an RBRACKET 'in the wild' (not in a character class), then it must be
// a regular character, with no special significance.
if c == RBRACKET {
if c == rbracketRune {
outQueue = append(outQueue, newPostfixCharNode(']'))
continue
}
@ -496,7 +496,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
}
}
if c == LBRACKET { // Used for character classes
if c == lbracketRune { // Used for character classes
firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
i++ // Step forward so we can look at the character class
@ -521,7 +521,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
chars := make([]postfixNode, 0) // List of nodes - used only for character classes
for i < len(re_postfix) {
if firstCharAdded && re_postfix[i] == RBRACKET {
if firstCharAdded && re_postfix[i] == rbracketRune {
break
}
if re_postfix[i] == CHAR_RANGE {
@ -581,7 +581,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i++
}
} else {
if re_postfix[i] == LBRACKET && i < len(re_postfix)-8 { // Could be the start of a POSIX class - the smallest POSIX class by word-length [[:word:]] takes 8 more characters
if re_postfix[i] == lbracketRune && i < len(re_postfix)-8 { // Could be the start of a POSIX class - the smallest POSIX class by word-length [[:word:]] takes 8 more characters
temp_i := i
temp_i++
if re_postfix[temp_i] == ':' {
@ -643,9 +643,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// will prevent it from running, as the outer if-statement will have evaluated to true.
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
switch re_postfix[i] {
case LBRACKET:
case lbracketRune:
chars = append(chars, newPostfixCharNode('['))
case RBRACKET:
case rbracketRune:
chars = append(chars, newPostfixCharNode(']'))
default:
return nil, fmt.Errorf("error parsing high-range unicode value in character class")
@ -912,8 +912,8 @@ func thompson(re []postfixNode) (Reg, error) {
}
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
replaceByValue([]int(stateToAdd.content), int(ESC_BACKSLASH), '\\')
replaceByValue(stateToAdd.except, ESC_BACKSLASH, '\\')
replaceByValue([]int(stateToAdd.content), int(escBackslashRune), '\\')
replaceByValue(stateToAdd.except, escBackslashRune, '\\')
nfa = append(nfa, &stateToAdd)
}

@ -8,16 +8,16 @@ import (
var whitespaceChars = []rune{' ', '\t', '\n'}
var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
var LBRACKET rune = 0xF0002
var RBRACKET rune = 0xF0003
var ANY_CHAR rune = 0xF0004 // Represents any character - used for states where the allChars flag is on.
var LPAREN_CHAR rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
var RPAREN_CHAR rune = 0xF0006
var NONCAPLPAREN_CHAR rune = 0xF0007 // Represents a non-capturing group's LPAREN
var ESC_BACKSLASH rune = 0xF0008 // Represents an escaped backslash
var lbracketRune rune = 0xF0002
var rbracketRune rune = 0xF0003
var anyCharRune rune = 0xF0004 // Represents any character - used for states where the allChars flag is on.
var lparenRune rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
var rparenRune rune = 0xF0006
var nonCapLparenRune rune = 0xF0007 // Represents a non-capturing group's LPAREN
var escBackslashRune rune = 0xF0008 // Represents an escaped backslash
var CHAR_RANGE rune = 0xF0009 // Represents a character range
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR}
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
// An interface for int and rune, which are identical
type character interface {

@ -198,7 +198,7 @@ func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
}
}
listTransitions := s.transitions[int(str[idx])]
for _, dest := range s.transitions[int(ANY_CHAR)] {
for _, dest := range s.transitions[int(anyCharRune)] {
if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
// Add an allChar state to the list of matches if:
// a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.

@ -54,7 +54,7 @@ func newCharClassNode(nodes []postfixNode, negated bool) postfixNode {
rtv.endReps = 1
if negated {
rtv.nodetype = characterNode
rtv.contents = []rune{ANY_CHAR}
rtv.contents = []rune{anyCharRune}
rtv.allChars = true
rtv.except = nodes
} else {
@ -169,10 +169,10 @@ func newPostfixNode(contents ...rune) postfixNode {
// Special cases for LPAREN and RPAREN - they have special characters defined for them
if to_return.nodetype == lparenNode {
to_return.contents = []rune{LPAREN_CHAR}
to_return.contents = []rune{lparenRune}
}
if to_return.nodetype == rparenNode {
to_return.contents = []rune{RPAREN_CHAR}
to_return.contents = []rune{rparenRune}
}
}
return to_return
@ -185,7 +185,7 @@ func newPostfixDotNode() postfixNode {
toReturn.endReps = 1
toReturn.nodetype = characterNode
toReturn.allChars = true
toReturn.contents = []rune{ANY_CHAR}
toReturn.contents = []rune{anyCharRune}
return toReturn
}

Loading…
Cancel
Save