Renamed more constants to avoid exporting

master
Aadhavan Srinivasan 3 days ago
parent b8f5b9af7c
commit 7e792f1248

@ -59,7 +59,7 @@ func priority(op rune) int {
func getPOSIXClass(str []rune) (bool, string) { func getPOSIXClass(str []rune) (bool, string) {
i := 0 i := 0
rtv := "" rtv := ""
for i < len(str) && (str[i] != ':' && str[i] != RBRACKET) { for i < len(str) && (str[i] != ':' && str[i] != rbracketRune) {
rtv += string(str[i]) rtv += string(str[i])
i++ i++
} }
@ -69,7 +69,7 @@ func getPOSIXClass(str []rune) (bool, string) {
if str[i] != ':' { // The POSIX class must end with a colon and a closing bracket. It cannot end with a closing bracket first. if str[i] != ':' { // The POSIX class must end with a colon and a closing bracket. It cannot end with a closing bracket first.
return false, "" return false, ""
} }
if str[i+1] != RBRACKET { if str[i+1] != rbracketRune {
return false, "" return false, ""
} }
return true, rtv return true, rtv
@ -174,13 +174,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
re_runes = append(re_runes, NONCAPLPAREN_CHAR) re_runes = append(re_runes, NONCAPLPAREN_CHAR)
i += 2 i += 2
} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash } else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
re_runes = append(re_runes, ESC_BACKSLASH) re_runes = append(re_runes, escBackslashRune)
i++ i++
} else if c == '[' && (i == 0 || re_runes[len(re_runes)-1] != '\\') { } else if c == '[' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
re_runes = append(re_runes, LBRACKET) re_runes = append(re_runes, lbracketRune)
continue continue
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') { } else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
re_runes = append(re_runes, RBRACKET) re_runes = append(re_runes, rbracketRune)
continue continue
} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') { } else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
return nil, fmt.Errorf("non-greedy operators are not supported") return nil, fmt.Errorf("non-greedy operators are not supported")
@ -203,7 +203,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i := 0 i := 0
for i < len(re_runes) { for i < len(re_runes) {
re_postfix = append(re_postfix, re_runes[i]) re_postfix = append(re_postfix, re_runes[i])
if re_runes[i] == LBRACKET && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped. if re_runes[i] == lbracketRune && (i == 0 || re_runes[i-1] != '\\') { // We do not touch things inside brackets, unless they are escaped.
toAppend := make([]rune, 0) // Holds all the runes in the current character class toAppend := make([]rune, 0) // Holds all the runes in the current character class
i++ // Skip past LBRACKET, because it was already added i++ // Skip past LBRACKET, because it was already added
@ -211,13 +211,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
return nil, fmt.Errorf("opening bracket without closing bracket") return nil, fmt.Errorf("opening bracket without closing bracket")
} }
for re_runes[i] != RBRACKET || i == 0 || re_runes[i-1] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash) for re_runes[i] != rbracketRune || i == 0 || re_runes[i-1] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error. // Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
if i >= len(re_runes) { if i >= len(re_runes) {
return nil, fmt.Errorf("opening bracket without closing bracket") return nil, fmt.Errorf("opening bracket without closing bracket")
} }
if re_runes[i] == LBRACKET && re_runes[i+1] == ':' { // POSIX character class if re_runes[i] == lbracketRune && re_runes[i+1] == ':' { // POSIX character class
toAppend = append(toAppend, re_runes[i]) toAppend = append(toAppend, re_runes[i])
i++ i++
toAppend = append(toAppend, re_runes[i]) toAppend = append(toAppend, re_runes[i])
@ -232,14 +232,14 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
toAppend = append(toAppend, re_runes[i]) toAppend = append(toAppend, re_runes[i])
i++ i++
} }
if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != RBRACKET) { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range if re_runes[i] == '-' && (i > 0 && re_runes[i-1] != '\\') && (i < len(re_runes)-1 && re_runes[i+1] != rbracketRune) { // Unescaped hyphen, that has some character (not a RBRACKET) after it - This represents a character range, so we replace with CHAR_RANGE. This metacharacter will be used later on to construct the range
re_runes[i] = CHAR_RANGE re_runes[i] = CHAR_RANGE
} }
toAppend = append(toAppend, re_runes[i]) toAppend = append(toAppend, re_runes[i])
i++ i++
} }
// Add in the RBRACKET // Add in the RBRACKET
toAppend = append(toAppend, RBRACKET) toAppend = append(toAppend, rbracketRune)
re_postfix = append(re_postfix, toAppend...) re_postfix = append(re_postfix, toAppend...)
} }
if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either if i < len(re_runes) && re_runes[i] == '{' && (i > 0 && re_runes[i-1] != '\\') { // We don't touch things inside braces, either
@ -357,7 +357,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// To deal with this, I make the following assertion: // To deal with this, I make the following assertion:
// If at any point I see an RBRACKET 'in the wild' (not in a character class), then it must be // If at any point I see an RBRACKET 'in the wild' (not in a character class), then it must be
// a regular character, with no special significance. // a regular character, with no special significance.
if c == RBRACKET { if c == rbracketRune {
outQueue = append(outQueue, newPostfixCharNode(']')) outQueue = append(outQueue, newPostfixCharNode(']'))
continue continue
} }
@ -496,7 +496,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
} }
} }
if c == LBRACKET { // Used for character classes if c == lbracketRune { // Used for character classes
firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added. firstCharAdded := false // A character class must have at least 1 character. This flag checks if the first character has been added.
endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter endOfRange := false // Set to 'true' when we encounter a CHAR_RANGE metacharacter
i++ // Step forward so we can look at the character class i++ // Step forward so we can look at the character class
@ -521,7 +521,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
chars := make([]postfixNode, 0) // List of nodes - used only for character classes chars := make([]postfixNode, 0) // List of nodes - used only for character classes
for i < len(re_postfix) { for i < len(re_postfix) {
if firstCharAdded && re_postfix[i] == RBRACKET { if firstCharAdded && re_postfix[i] == rbracketRune {
break break
} }
if re_postfix[i] == CHAR_RANGE { if re_postfix[i] == CHAR_RANGE {
@ -581,7 +581,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i++ i++
} }
} else { } else {
if re_postfix[i] == LBRACKET && i < len(re_postfix)-8 { // Could be the start of a POSIX class - the smallest POSIX class by word-length [[:word:]] takes 8 more characters if re_postfix[i] == lbracketRune && i < len(re_postfix)-8 { // Could be the start of a POSIX class - the smallest POSIX class by word-length [[:word:]] takes 8 more characters
temp_i := i temp_i := i
temp_i++ temp_i++
if re_postfix[temp_i] == ':' { if re_postfix[temp_i] == ':' {
@ -643,9 +643,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// will prevent it from running, as the outer if-statement will have evaluated to true. // will prevent it from running, as the outer if-statement will have evaluated to true.
if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket. if !firstCharAdded && re_postfix[i] > 0xF0000 { // It's a metacharacter that I defined, I'll have to convert it back to the regular character before adding it back, because I haven't added any characters yet. For example, '[[]', the second LBRACKET should be treated like a literal bracket.
switch re_postfix[i] { switch re_postfix[i] {
case LBRACKET: case lbracketRune:
chars = append(chars, newPostfixCharNode('[')) chars = append(chars, newPostfixCharNode('['))
case RBRACKET: case rbracketRune:
chars = append(chars, newPostfixCharNode(']')) chars = append(chars, newPostfixCharNode(']'))
default: default:
return nil, fmt.Errorf("error parsing high-range unicode value in character class") return nil, fmt.Errorf("error parsing high-range unicode value in character class")
@ -912,8 +912,8 @@ func thompson(re []postfixNode) (Reg, error) {
} }
// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it // Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
replaceByValue([]int(stateToAdd.content), int(ESC_BACKSLASH), '\\') replaceByValue([]int(stateToAdd.content), int(escBackslashRune), '\\')
replaceByValue(stateToAdd.except, ESC_BACKSLASH, '\\') replaceByValue(stateToAdd.except, escBackslashRune, '\\')
nfa = append(nfa, &stateToAdd) nfa = append(nfa, &stateToAdd)
} }

@ -8,16 +8,16 @@ import (
var whitespaceChars = []rune{' ', '\t', '\n'} var whitespaceChars = []rune{' ', '\t', '\n'}
var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_") var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
var LBRACKET rune = 0xF0002 var lbracketRune rune = 0xF0002
var RBRACKET rune = 0xF0003 var rbracketRune rune = 0xF0003
var ANY_CHAR rune = 0xF0004 // Represents any character - used for states where the allChars flag is on. var anyCharRune rune = 0xF0004 // Represents any character - used for states where the allChars flag is on.
var LPAREN_CHAR rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses var lparenRune rune = 0xF0005 // Parentheses in regex are concatenated with this - it acts as a pseudio-parentheses
var RPAREN_CHAR rune = 0xF0006 var rparenRune rune = 0xF0006
var NONCAPLPAREN_CHAR rune = 0xF0007 // Represents a non-capturing group's LPAREN var nonCapLparenRune rune = 0xF0007 // Represents a non-capturing group's LPAREN
var ESC_BACKSLASH rune = 0xF0008 // Represents an escaped backslash var escBackslashRune rune = 0xF0008 // Represents an escaped backslash
var CHAR_RANGE rune = 0xF0009 // Represents a character range var CHAR_RANGE rune = 0xF0009 // Represents a character range
var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', LBRACKET, RBRACKET, NONCAPLPAREN_CHAR} var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
// An interface for int and rune, which are identical // An interface for int and rune, which are identical
type character interface { type character interface {

@ -198,7 +198,7 @@ func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
} }
} }
listTransitions := s.transitions[int(str[idx])] listTransitions := s.transitions[int(str[idx])]
for _, dest := range s.transitions[int(ANY_CHAR)] { for _, dest := range s.transitions[int(anyCharRune)] {
if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) { if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
// Add an allChar state to the list of matches if: // Add an allChar state to the list of matches if:
// a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't. // a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.

@ -54,7 +54,7 @@ func newCharClassNode(nodes []postfixNode, negated bool) postfixNode {
rtv.endReps = 1 rtv.endReps = 1
if negated { if negated {
rtv.nodetype = characterNode rtv.nodetype = characterNode
rtv.contents = []rune{ANY_CHAR} rtv.contents = []rune{anyCharRune}
rtv.allChars = true rtv.allChars = true
rtv.except = nodes rtv.except = nodes
} else { } else {
@ -169,10 +169,10 @@ func newPostfixNode(contents ...rune) postfixNode {
// Special cases for LPAREN and RPAREN - they have special characters defined for them // Special cases for LPAREN and RPAREN - they have special characters defined for them
if to_return.nodetype == lparenNode { if to_return.nodetype == lparenNode {
to_return.contents = []rune{LPAREN_CHAR} to_return.contents = []rune{lparenRune}
} }
if to_return.nodetype == rparenNode { if to_return.nodetype == rparenNode {
to_return.contents = []rune{RPAREN_CHAR} to_return.contents = []rune{rparenRune}
} }
} }
return to_return return to_return
@ -185,7 +185,7 @@ func newPostfixDotNode() postfixNode {
toReturn.endReps = 1 toReturn.endReps = 1
toReturn.nodetype = characterNode toReturn.nodetype = characterNode
toReturn.allChars = true toReturn.allChars = true
toReturn.contents = []rune{ANY_CHAR} toReturn.contents = []rune{anyCharRune}
return toReturn return toReturn
} }

Loading…
Cancel
Save