@ -51,6 +51,9 @@ func shuntingYard(re string) []postfixNode {
// Eventually, I might be able to add it into the main parsing loop, to reduce the time
// Eventually, I might be able to add it into the main parsing loop, to reduce the time
// complexity.
// complexity.
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
//
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
for i := 0 ; i < len ( re_runes_orig ) ; i ++ {
for i := 0 ; i < len ( re_runes_orig ) ; i ++ {
c := re_runes_orig [ i ]
c := re_runes_orig [ i ]
if c == '<' && ( i == 0 || ( re_runes_orig [ i - 1 ] != '\\' && re_runes_orig [ i - 1 ] != '?' ) ) {
if c == '<' && ( i == 0 || ( re_runes_orig [ i - 1 ] != '\\' && re_runes_orig [ i - 1 ] != '?' ) ) {
@ -82,7 +85,9 @@ func shuntingYard(re string) []postfixNode {
fmt . Sscanf ( tmpStr , "%d-%d" , & rangeStart , & rangeEnd )
fmt . Sscanf ( tmpStr , "%d-%d" , & rangeStart , & rangeEnd )
regex := range2regex ( rangeStart , rangeEnd )
regex := range2regex ( rangeStart , rangeEnd )
re_runes = append ( re_runes , [ ] rune ( regex ) ... )
re_runes = append ( re_runes , [ ] rune ( regex ) ... )
} else if c == '(' && i < len ( re_runes_orig ) - 2 && re_runes_orig [ i + 1 ] == '?' && re_runes_orig [ i + 2 ] == ':' {
re_runes = append ( re_runes , NONCAPLPAREN_CHAR )
i += 2
} else {
} else {
re_runes = append ( re_runes , c )
re_runes = append ( re_runes , c )
}
}
@ -148,7 +153,11 @@ func shuntingYard(re string) []postfixNode {
}
}
re_postfix = append ( re_postfix , re_runes [ i ] ) // Append closing brace
re_postfix = append ( re_postfix , re_runes [ i ] ) // Append closing brace
}
}
if i < len ( re_runes ) && re_runes [ i ] == '(' && ( i == 0 || re_runes [ i - 1 ] != '\\' ) && ( i < len ( re_runes ) - 1 && re_runes [ i + 1 ] == '?' ) { // Unescaped open parentheses followed by question mark = lokaround. Don't mess with it.
if i < len ( re_runes ) - 3 && string ( re_runes [ i + 1 : i + 4 ] ) == "(?:" { // Non-capturing lparen
re_postfix = append ( re_postfix , NONCAPLPAREN_CHAR )
i += 3
}
if i < len ( re_runes ) && re_runes [ i ] == '(' && ( i == 0 || re_runes [ i - 1 ] != '\\' ) && ( i < len ( re_runes ) - 2 && re_runes [ i + 1 ] == '?' && slices . Contains ( [ ] rune { '=' , '!' , '<' } , re_runes [ i + 2 ] ) ) { // Unescaped open parentheses followed by question mark then '<', '!' or '=' => lokaround. Don't mess with it.
i ++ // Step inside
i ++ // Step inside
if i == len ( re_runes ) - 1 || ( re_runes [ i + 1 ] != '=' && re_runes [ i + 1 ] != '!' && re_runes [ i + 1 ] != '<' ) {
if i == len ( re_runes ) - 1 || ( re_runes [ i + 1 ] != '=' && re_runes [ i + 1 ] != '!' && re_runes [ i + 1 ] != '<' ) {
panic ( "Invalid regex. Lookaround intended?" )
panic ( "Invalid regex. Lookaround intended?" )
@ -174,7 +183,7 @@ func shuntingYard(re string) []postfixNode {
}
}
continue
continue
}
}
if i < len ( re_runes ) && ( re_runes [ i ] != '(' && re_runes [ i ] != '|' && re_runes [ i ] != '\\' ) || ( i > 0 && re_runes [ i - 1 ] == '\\' ) { // Every character should be concatenated if it is escaped
if i < len ( re_runes ) && ( re_runes [ i ] != '(' && re_runes [ i ] != NONCAPLPAREN_CHAR && re_runes [ i ] != '|' && re_runes [ i ] != '\\' ) || ( i > 0 && re_runes [ i - 1 ] == '\\' ) { // Every character should be concatenated if it is escaped
if i < len ( re_runes ) - 1 {
if i < len ( re_runes ) - 1 {
if re_runes [ i + 1 ] != '|' && re_runes [ i + 1 ] != '*' && re_runes [ i + 1 ] != '+' && re_runes [ i + 1 ] != '?' && re_runes [ i + 1 ] != ')' && re_runes [ i + 1 ] != '{' {
if re_runes [ i + 1 ] != '|' && re_runes [ i + 1 ] != '*' && re_runes [ i + 1 ] != '+' && re_runes [ i + 1 ] != '?' && re_runes [ i + 1 ] != ')' && re_runes [ i + 1 ] != '{' {
re_postfix = append ( re_postfix , CONCAT )
re_postfix = append ( re_postfix , CONCAT )
@ -197,7 +206,7 @@ func shuntingYard(re string) []postfixNode {
b . If not , keep popping from opStack ( and appending to outQueue ) until :
b . If not , keep popping from opStack ( and appending to outQueue ) until :
i . opStack is empty , OR
i . opStack is empty , OR
ii . current character has greater priority than top of opStack
ii . current character has greater priority than top of opStack
3. If current character is '(' , push to opStack
3. If current character is '(' or NONCAPLPAREN_CHAR , push to opStack
4. If current character is ')' , pop from opStack ( and append to outQueue ) until '(' is found . Discard parantheses .
4. If current character is ')' , pop from opStack ( and append to outQueue ) until '(' is found . Discard parantheses .
5. If current character is '[' , find all the characters until ']' , then create a postfixNode containing all these contents . Add this node to outQueue .
5. If current character is '[' , find all the characters until ']' , then create a postfixNode containing all these contents . Add this node to outQueue .
6. If current character is '{' , find the appropriate numeric specifier ( range start , range end ) . Apply the range to the postfixNode at the end of outQueue .
6. If current character is '{' , find the appropriate numeric specifier ( range start , range end ) . Apply the range to the postfixNode at the end of outQueue .
@ -389,22 +398,28 @@ func shuntingYard(re string) []postfixNode {
outQueue [ idx ] . startReps = startRangeNum
outQueue [ idx ] . startReps = startRangeNum
outQueue [ idx ] . endReps = endRangeNum
outQueue [ idx ] . endReps = endRangeNum
}
}
if c == '(' {
if c == '(' || c == NONCAPLPAREN_CHAR {
opStack = append ( opStack , c )
opStack = append ( opStack , c )
outQueue = append ( outQueue , newPostfixNode ( c ) )
if c == '(' { // We only push _capturing_ group parentheses to outQueue
outQueue = append ( outQueue , newPostfixNode ( c ) )
}
numOpenParens ++
numOpenParens ++
}
}
if c == ')' {
if c == ')' {
// Keep popping from opStack until we encounter an opening parantheses. Panic if we reach the end of the stack.
// Keep popping from opStack until we encounter an opening parantheses or a NONCAPLPAREN_CHAR. Panic if we reach the end of the stack.
for val , err := peek ( opStack ) ; val != '(' ; val , err = peek ( opStack ) {
var val rune
var err error
for val , err = peek ( opStack ) ; val != '(' && val != NONCAPLPAREN_CHAR ; val , err = peek ( opStack ) {
if err != nil {
if err != nil {
panic ( "ERROR: Imbalanced parantheses." )
panic ( "ERROR: Imbalanced parantheses." )
}
}
to_append := mustPop ( & opStack )
to_append := mustPop ( & opStack )
outQueue = append ( outQueue , newPostfixNode ( to_append ) )
outQueue = append ( outQueue , newPostfixNode ( to_append ) )
}
}
_ = mustPop ( & opStack ) // Get rid of opening parentheses
_ = mustPop ( & opStack ) // Get rid of opening parentheses
outQueue = append ( outQueue , newPostfixNode ( ')' ) ) // Add closing parentheses
if val == '(' { // Whatever was inside the parentheses was a _capturing_ group, so we append the closing parentheses as well
outQueue = append ( outQueue , newPostfixNode ( ')' ) ) // Add closing parentheses
}
numOpenParens --
numOpenParens --
}
}
}
}