@ -127,7 +127,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
for i < len ( re_runes_orig ) && re_runes_orig [ i ] != '>' {
if ! unicode . IsDigit ( re_runes_orig [ i ] ) {
if re_runes_orig [ i ] != '-' || ( hyphenFound ) {
return nil , fmt . Errorf ( " Invalid numeric range. ")
return nil , fmt . Errorf ( " invalid numeric range ")
}
}
if re_runes_orig [ i ] == '-' {
@ -138,10 +138,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
// End of string reached and last character doesn't close the range
if i == len ( re_runes_orig ) && re_runes_orig [ len ( re_runes_orig ) - 1 ] != '>' {
return nil , fmt . Errorf ( " Numeric range not closed. ")
return nil , fmt . Errorf ( " numeric range not closed ")
}
if len ( tmpStr ) == 0 {
return nil , fmt . Errorf ( " Empty numeric range. ")
return nil , fmt . Errorf ( " empty numeric range ")
}
// Closing bracket will be skipped when the loop variable increments
var rangeStart int
@ -185,13 +185,13 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i ++ // Skip past LBRACKET, because it was already added
if i >= len ( re_runes ) { // Sanity check before we start
return nil , fmt . Errorf ( " Opening bracket without closing bracket. ")
return nil , fmt . Errorf ( " opening bracket without closing bracket ")
}
for re_runes [ i ] != RBRACKET || i == 0 || re_runes [ i - 1 ] == '\\' { // Skip all characters inside _unescaped_ brackets (we are _not_ at a closing bracket, or if we are, the previous character is a backslash)
// Make sure we haven't exceeded the length of the string. If we did, then the regex doesn't actually have a closing bracket and we should throw an error.
if i >= len ( re_runes ) {
return nil , fmt . Errorf ( " Opening bracket without closing bracket. ")
return nil , fmt . Errorf ( " opening bracket without closing bracket ")
}
if re_runes [ i ] == LBRACKET && re_runes [ i + 1 ] == ':' { // POSIX character class
@ -204,7 +204,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i ++
}
if i >= len ( re_runes ) - 1 && re_runes [ i ] != ':' {
return nil , fmt . Errorf ( " U nable to parse what looks like a POSIX character class. ")
return nil , fmt . Errorf ( " u nable to parse what looks like a POSIX character class")
}
toAppend = append ( toAppend , re_runes [ i ] )
i ++
@ -226,7 +226,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i ++
}
if i == len ( re_runes ) {
return nil , fmt . Errorf ( " Invalid numeric specifier. ")
return nil , fmt . Errorf ( " invalid numeric specifier ")
}
re_postfix = append ( re_postfix , re_runes [ i ] ) // Append closing brace
}
@ -237,25 +237,25 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if i < len ( re_runes ) && re_runes [ i ] == '\\' { // Something is being escaped (I don't add the backslash to re_postfix, because it was already added earlier)
i ++
if i >= len ( re_runes ) {
return nil , fmt . Errorf ( " Stray backslash in expression. ")
return nil , fmt . Errorf ( " stray backslash in expression ")
}
if re_runes [ i ] == 'x' {
re_postfix = append ( re_postfix , re_runes [ i ] )
i ++
if i >= len ( re_runes ) {
return nil , fmt . Errorf ( " Stray backslash in expression. ")
return nil , fmt . Errorf ( " stray backslash in expression ")
}
if re_runes [ i ] == '{' {
re_postfix = append ( re_postfix , re_runes [ i : i + 8 ] ... )
i += 7
if i >= len ( re_runes ) {
return nil , fmt . Errorf ( " Stray backslash in expression. ")
return nil , fmt . Errorf ( " stray backslash in expression ")
}
} else if isHex ( re_runes [ i ] ) {
re_postfix = append ( re_postfix , re_runes [ i : i + 2 ] ... )
i += 2
} else {
return nil , fmt . Errorf ( " Invalid hex value in expression. ")
return nil , fmt . Errorf ( " invalid hex value in expression ")
}
} else if isOctal ( re_runes [ i ] ) {
numDigits := 1
@ -271,14 +271,14 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if i < len ( re_runes ) && re_runes [ i ] == '(' && ( i == 0 || re_runes [ i - 1 ] != '\\' ) && ( i < len ( re_runes ) - 2 && re_runes [ i + 1 ] == '?' && slices . Contains ( [ ] rune { '=' , '!' , '<' } , re_runes [ i + 2 ] ) ) { // Unescaped open parentheses followed by question mark then '<', '!' or '=' => lokaround. Don't mess with it.
i ++ // Step inside
if i == len ( re_runes ) - 1 || ( re_runes [ i + 1 ] != '=' && re_runes [ i + 1 ] != '!' && re_runes [ i + 1 ] != '<' ) {
return nil , fmt . Errorf ( " Invalid regex. L ookaround intended?")
return nil , fmt . Errorf ( " invalid regex - l ookaround intended?")
}
re_postfix = append ( re_postfix , re_runes [ i ] )
i ++
numOpenParens := 1
for numOpenParens != 0 {
if i >= len ( re_runes ) {
return nil , fmt . Errorf ( " Unclosed lookaround. ")
return nil , fmt . Errorf ( " unclosed lookaround ")
}
if re_runes [ i ] == '(' || re_runes [ i ] == NONCAPLPAREN_CHAR {
numOpenParens ++
@ -345,7 +345,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if c == '\\' { // Escape character - invert special and non-special characters eg. \( is treated as a literal parentheses, \b is treated as word boundary
if i == len ( re_postfix ) - 1 { // End of string - panic, because backslash is an escape character (something needs to come after it)
return nil , fmt . Errorf ( " ERROR: Backslash with no escape character. ")
return nil , fmt . Errorf ( " backslash with no escape character ")
}
i ++
if re_postfix [ i ] == 'x' { // Hex value
@ -354,19 +354,19 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
var hexVal int
n , err := fmt . Sscanf ( string ( re_postfix [ i : ] ) , "{%x}" , & hexVal )
if n < 1 || err != nil {
return nil , fmt . Errorf ( " Error parsing expanded hex code in expression. ")
return nil , fmt . Errorf ( " error parsing expanded hex code in expression ")
}
outQueue = append ( outQueue , newPostfixCharNode ( rune ( hexVal ) ) )
i += 7
} else if i < len ( re_postfix ) - 1 { // Two-digit hex code
hexVal , err := strconv . ParseInt ( string ( [ ] rune { re_postfix [ i ] , re_postfix [ i + 1 ] } ) , 16 , 64 ) // Convert the two hex values into a rune slice, then to a string. Parse the string into an int with strconv.ParseInt()
if err != nil {
return nil , fmt . Errorf ( " Error parsing hex characters in expression. ")
return nil , fmt . Errorf ( " error parsing hex characters in expression ")
}
i ++ // Loop increment will take care of going forward
outQueue = append ( outQueue , newPostfixCharNode ( rune ( hexVal ) ) )
} else {
return nil , fmt . Errorf ( " Not enough hex characters found in expression. ")
return nil , fmt . Errorf ( " not enough hex characters found in expression ")
}
} else if isOctal ( re_postfix [ i ] ) { // Octal value
var octVal int64
@ -378,17 +378,17 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
octVal , err := strconv . ParseInt ( octValStr , 8 , 32 )
if err != nil {
return nil , fmt . Errorf ( " Error parsing octal value in expression. ")
return nil , fmt . Errorf ( " error parsing octal value in expression ")
}
if octVal > 0777 {
return nil , fmt . Errorf ( " Invalid octal value in expression. ")
return nil , fmt . Errorf ( " invalid octal value in expression ")
}
i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically
outQueue = append ( outQueue , newPostfixCharNode ( rune ( octVal ) ) )
} else {
escapedNode , err := newEscapedNode ( re_postfix [ i ] , false )
if err != nil {
return nil , fmt . Errorf ( " Invalid escape character in expression. ")
return nil , fmt . Errorf ( " invalid escape character in expression ")
}
outQueue = append ( outQueue , escapedNode )
}
@ -412,7 +412,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
numOpenParens := 1
for numOpenParens != 0 {
if i >= len ( re_postfix ) {
return nil , fmt . Errorf ( " Unclosed lookaround. ")
return nil , fmt . Errorf ( " unclosed lookaround ")
}
if re_postfix [ i ] == '(' || re_postfix [ i ] == NONCAPLPAREN_CHAR {
numOpenParens ++
@ -427,7 +427,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i ++
}
if len ( regex ) <= 1 { // Nothing in regex - panic
return nil , fmt . Errorf ( " I nvalid lookaround. (too short?)")
return nil , fmt . Errorf ( " i nvalid lookaround. (too short?)")
}
// 'regex' should now contain the lookaround regex, plus the characters at the start (which indicate pos/neg, ahead/behind)
// Now we should filter that out.
@ -438,7 +438,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else if regex [ 0 ] == '=' || regex [ 0 ] == '!' {
toAppend . lookaroundDir = LOOKAHEAD
} else {
return nil , fmt . Errorf ( " Invalid lookaround. ")
return nil , fmt . Errorf ( " invalid lookaround ")
}
// Positive or negative
if regex [ 0 ] == '=' { // Positive
@ -448,7 +448,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
toAppend . lookaroundSign = NEGATIVE
toAppend . contents = [ ] rune ( regex [ 1 : ] )
} else {
return nil , fmt . Errorf ( " Invalid lookaround. ")
return nil , fmt . Errorf ( " invalid lookaround ")
}
outQueue = append ( outQueue , toAppend )
continue
@ -459,7 +459,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else {
topStack , err := peek ( opStack )
if err != nil {
return nil , fmt . Errorf ( " Operator without operand. ")
return nil , fmt . Errorf ( " operator without operand ")
}
if priority ( c ) > priority ( topStack ) { // 2a
opStack = append ( opStack , c )
@ -479,7 +479,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i ++ // Step forward so we can look at the character class
// Oops, there's nothing there to look at
if i >= len ( re_postfix ) {
return nil , fmt . Errorf ( " Opening bracket with no closing bracket. ")
return nil , fmt . Errorf ( " opening bracket with no closing bracket ")
}
// Check if a POSIX character class was specified ouside a bracket. This is an error.
@ -487,7 +487,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if re_postfix [ i ] == ':' {
posixClassPresent , _ := getPOSIXClass ( re_postfix [ i + 1 : ] )
if posixClassPresent {
return nil , fmt . Errorf ( " T he syntax for POSIX character classes is [[:digit:]], not [:digit:]")
return nil , fmt . Errorf ( " t he syntax for POSIX character classes is [[:digit:]], not [:digit:]")
}
}
@ -508,7 +508,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
if re_postfix [ i ] == '\\' { // Backslash indicates a character to be escaped
if i == len ( re_postfix ) - 1 {
return nil , fmt . Errorf ( " Stray backslash in character class. ")
return nil , fmt . Errorf ( " stray backslash in character class ")
}
i ++ // Step past backslash
@ -518,19 +518,19 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
var hexVal int
n , err := fmt . Sscanf ( string ( re_postfix [ i : ] ) , "{%x}" , & hexVal )
if n < 1 || err != nil {
return nil , fmt . Errorf ( " Error parsing expanded hex code in character class. ")
return nil , fmt . Errorf ( " error parsing expanded hex code in character class ")
}
chars = append ( chars , newPostfixCharNode ( rune ( hexVal ) ) )
i += 8
} else if i < len ( re_postfix ) - 2 { // Two-digit hex code
hexVal , err := strconv . ParseInt ( string ( [ ] rune { re_postfix [ i ] , re_postfix [ i + 1 ] } ) , 16 , 64 ) // Convert the two hex values into a rune slice, then to a string. Parse the string into an int with strconv.ParseInt()
if err != nil {
return nil , fmt . Errorf ( " Error parsing hex characters in character class. ")
return nil , fmt . Errorf ( " error parsing hex characters in character class ")
}
i += 2
chars = append ( chars , newPostfixCharNode ( rune ( hexVal ) ) )
} else {
return nil , fmt . Errorf ( " Not enough hex characters found in character class. ")
return nil , fmt . Errorf ( " not enough hex characters found in character class ")
}
} else if isOctal ( re_postfix [ i ] ) { // Octal value
var octVal int64
@ -542,17 +542,17 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
octVal , err := strconv . ParseInt ( octValStr , 8 , 32 )
if err != nil {
return nil , fmt . Errorf ( " Error parsing octal value in character class. ")
return nil , fmt . Errorf ( " error parsing octal value in character class ")
}
if octVal > 0777 {
return nil , fmt . Errorf ( " Invalid octal value in character class. ")
return nil , fmt . Errorf ( " invalid octal value in character class ")
}
i += numDigitsParsed // Shift forward by the number of characters parsed
chars = append ( chars , newPostfixCharNode ( rune ( octVal ) ) )
} else {
escapedNode , err := newEscapedNode ( re_postfix [ i ] , true )
if err != nil {
return nil , fmt . Errorf ( " Invalid escape character in character class. ")
return nil , fmt . Errorf ( " invalid escape character in character class ")
}
chars = append ( chars , escapedNode )
i ++
@ -602,7 +602,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
case "word" : // Word characters
nodeToAdd , _ = newEscapedNode ( 'w' , true ) // This isn't going to error, so I suppress it
default :
return nil , fmt . Errorf ( " Invalid POSIX character class. ")
return nil , fmt . Errorf ( " invalid POSIX character class ")
}
chars = append ( chars , nodeToAdd )
i = temp_i + len ( posixClass ) + 2 // Skip over the class name, the closing colon and the closing bracket
@ -623,7 +623,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
case RBRACKET :
chars = append ( chars , newPostfixCharNode ( ']' ) )
default :
return nil , fmt . Errorf ( " E rror parsing high-range unicode value in character class. ")
return nil , fmt . Errorf ( " e rror parsing high-range unicode value in character class")
}
}
chars = append ( chars , newPostfixCharNode ( re_postfix [ i ] ) )
@ -649,7 +649,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if ( err1 != nil || err2 != nil ) || len ( startRangePostfixNode . contents ) != 1 { // Treat it as a regular hyphen
chars = append ( chars , startRangePostfixNode , newPostfixCharNode ( '-' ) , endRangePostfixNode )
} else if len ( endRangePostfixNode . contents ) != 1 { // I don't even know what this would look like, this is just a sanity check
return nil , fmt . Errorf ( " Error parsing character range. ")
return nil , fmt . Errorf ( " error parsing character range ")
} else {
// We have established that they both have a length of 1
startRangeRune := startRangePostfixNode . contents [ 0 ]
@ -661,7 +661,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
}
if i == len ( re_postfix ) { // We have reached the end of the string, so we didn't encounter a closing brakcet. Panic.
return nil , fmt . Errorf ( " Opening bracket without closing bracket. ")
return nil , fmt . Errorf ( " opening bracket without closing bracket ")
}
outQueue = append ( outQueue , newCharClassNode ( chars , invertMatch ) )
@ -682,10 +682,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i ++
}
if len ( startRange ) == 0 { // {} is not valid, neither is {,5}
return nil , fmt . Errorf ( " Invalid numeric specifier. ")
return nil , fmt . Errorf ( " invalid numeric specifier ")
}
if i == len ( re_postfix ) {
return nil , fmt . Errorf ( " Brace not closed. ")
return nil , fmt . Errorf ( " brace not closed ")
}
startRangeNum , err := strconv . Atoi ( string ( startRange ) )
@ -697,7 +697,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
endRangeNum = startRangeNum
} else {
if re_postfix [ i ] != ',' {
return nil , fmt . Errorf ( " Invalid numeric specifier. ")
return nil , fmt . Errorf ( " invalid numeric specifier ")
}
i ++ // Skip comma
for i < len ( re_postfix ) && unicode . IsDigit ( re_postfix [ i ] ) {
@ -705,10 +705,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
i ++
}
if i == len ( re_postfix ) {
return nil , fmt . Errorf ( " Brace not closed. ")
return nil , fmt . Errorf ( " brace not closed ")
}
if re_postfix [ i ] != '}' {
return nil , fmt . Errorf ( " Invalid numeric specifier. ")
return nil , fmt . Errorf ( " invalid numeric specifier ")
}
if len ( endRange ) == 0 { // Case 3 above
endRangeNum = INFINITE_REPS
@ -724,7 +724,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
idx := len ( outQueue ) - 1
// Get the last added node
if idx < 0 || outQueue [ idx ] . nodetype == LPAREN {
return nil , fmt . Errorf ( " Numeric specifier with no content. ")
return nil , fmt . Errorf ( " numeric specifier with no content ")
}
outQueue [ idx ] . startReps = startRangeNum
outQueue [ idx ] . endReps = endRangeNum
@ -742,7 +742,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
var err error
for val , err = peek ( opStack ) ; val != '(' && val != NONCAPLPAREN_CHAR ; val , err = peek ( opStack ) {
if err != nil {
return nil , fmt . Errorf ( " Imbalanced parantheses. ")
return nil , fmt . Errorf ( " imbalanced parantheses ")
}
to_append := mustPop ( & opStack )
outQueue = append ( outQueue , newPostfixNode ( to_append ) )
@ -762,7 +762,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
if numOpenParens != 0 {
return nil , fmt . Errorf ( " Imbalanced parantheses. ")
return nil , fmt . Errorf ( " imbalanced parantheses ")
}
return outQueue , nil
@ -853,11 +853,11 @@ func thompson(re []postfixNode) (Reg, error) {
}
tmpRe , err := shuntingYard ( state . lookaroundRegex )
if err != nil {
return Reg { } , fmt . Errorf ( " E rror parsing lookaround: %w", err )
return Reg { } , fmt . Errorf ( " e rror parsing lookaround: %w", err )
}
reg , err := thompson ( tmpRe )
if err != nil {
return Reg { } , fmt . Errorf ( " E rror compiling lookaround: %w", err )
return Reg { } , fmt . Errorf ( " e rror compiling lookaround: %w", err )
}
state . lookaroundNFA = reg . start
state . lookaroundNumCaptureGroups = reg . numGroups
@ -899,11 +899,11 @@ func thompson(re []postfixNode) (Reg, error) {
middleNode , err1 := pop ( & nfa )
lparenNode , err2 := pop ( & nfa )
if err1 != nil && err2 != nil {
return Reg { } , fmt . Errorf ( " Imbalanced parentheses. ")
return Reg { } , fmt . Errorf ( " imbalanced parentheses ")
} else if err2 != nil { // There was no third node. ie. something like '()'
lparenNode = middleNode
if lparenNode . groupBegin != true { // There are only two nodes, but the first one isn't an LPAREN.
return Reg { } , fmt . Errorf ( " Imbalanced parentheses. ")
return Reg { } , fmt . Errorf ( " imbalanced parentheses ")
}
s . groupNum = lparenNode . groupNum
to_add := concatenate ( lparenNode , s )
@ -915,7 +915,7 @@ func thompson(re []postfixNode) (Reg, error) {
} else if middleNode . groupBegin { // Something like 'a()'
s . groupNum = middleNode . groupNum
} else { // A middleNode and lparenNode exist, but neither is actually an LPAREN.
return Reg { } , fmt . Errorf ( " Imbalanced parentheses. ")
return Reg { } , fmt . Errorf ( " imbalanced parentheses ")
}
tmp := concatenate ( lparenNode , middleNode )
to_add := concatenate ( tmp , s )
@ -993,7 +993,7 @@ func thompson(re []postfixNode) (Reg, error) {
}
if c . startReps != 1 || c . endReps != 1 { // Must have a numeric specifier attached to it
if c . endReps != - 1 && c . endReps < c . startReps {
return Reg { } , fmt . Errorf ( " Numeric specifier - start greater than end. ")
return Reg { } , fmt . Errorf ( " numeric specifier - start greater than end ")
}
state := mustPop ( & nfa )
var stateToAdd * State = nil
@ -1024,7 +1024,7 @@ func thompson(re []postfixNode) (Reg, error) {
}
}
if len ( nfa ) != 1 {
return Reg { } , fmt . Errorf ( " Invalid Regex. ")
return Reg { } , fmt . Errorf ( " invalid regex ")
}
verifyLastStates ( nfa )
@ -1040,11 +1040,11 @@ func thompson(re []postfixNode) (Reg, error) {
func Compile ( re string , flags ... ReFlag ) ( Reg , error ) {
nodes , err := shuntingYard ( re , flags ... )
if err != nil {
return Reg { } , fmt . Errorf ( " E rror parsing regex: %w", err )
return Reg { } , fmt . Errorf ( " e rror parsing regex: %w", err )
}
reg , err := thompson ( nodes )
if err != nil {
return Reg { } , fmt . Errorf ( " E rror compiling regex: %w", err )
return Reg { } , fmt . Errorf ( " e rror compiling regex: %w", err )
}
return reg , nil
}