@ -112,7 +112,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// Convert the string to a slice of runes to allow iteration through it
re_runes_orig := [ ] rune ( re ) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges)
re_runes := make ( [ ] rune , 0 )
// Check for numeric range. If we are at the start of a numeric range,
// The following checks are performed here:
// 1. Check for numeric range. If we are at the start of a numeric range,
// skip to end and construct the equivalent regex for the range.
// The reason this is outside the loop below, is that it actually modifies
// the given regex (we 'cut' the numeric range and 'paste' an equivalent regex).
@ -122,13 +123,19 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
// complexity.
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
//
// Also c heck for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
// 2. C heck for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
//
// Another check is made for unescaped brackets - opening brackets are replaced with LBRACKET and closing brackets are replaced with RBRACKET.
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
// 3. Another check is made for unescaped brackets - opening brackets are replaced with
// LBRACKET and closing brackets are replaced with RBRACKET.
//
// 4. Check for escaped backslashes. Replace these with the BACKSLASH
// metacharacter. Later, in thompson(), these will be converted back. This avoids
// confusion in detecting whether a character is escaped eg. detecting
// whether '\\[a]' has an escaped opening bracket (it doesn't).
//
// 5. Check for non-greedy operators. These are not supported at the moment, so an error
// must be thrown if the user attempts to use a non-greedy operator.
for i := 0 ; i < len ( re_runes_orig ) ; i ++ {
c := re_runes_orig [ i ]
if c == '<' && ( i == 0 || ( re_runes_orig [ i - 1 ] != '\\' && re_runes_orig [ i - 1 ] != '?' ) ) {
@ -172,6 +179,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else if c == ']' && ( i == 0 || re_runes [ len ( re_runes ) - 1 ] != '\\' ) {
re_runes = append ( re_runes , RBRACKET )
continue
} else if slices . Contains ( [ ] rune { '+' , '*' , '?' } , c ) && ( i < len ( re_runes_orig ) - 1 && re_runes_orig [ i + 1 ] == '?' ) {
return nil , fmt . Errorf ( "non-greedy operators are not supported" )
} else {
re_runes = append ( re_runes , c )
}
@ -480,9 +489,6 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if ( c == '*' && outQueueFinalElement . nodetype == KLEENE ) || ( c == '+' && outQueueFinalElement . nodetype == PLUS ) { // You cannot apply a quantifier to a quantifier in this way
return nil , fmt . Errorf ( "illegal use of token '%c'" , c )
}
if c == '?' && slices . Contains ( [ ] NodeType { KLEENE , PLUS , QUESTION } , outQueueFinalElement . nodetype ) {
return nil , fmt . Errorf ( "non-greedy operators not supported" )
}
opStack = append ( opStack , c )
}
}
@ -1004,18 +1010,24 @@ func thompson(re []postfixNode) (Reg, error) {
if err != nil {
return Reg { } , fmt . Errorf ( "error applying kleene star" )
}
if s1 . isEmpty && s1 . assert != NONE {
return Reg { } , fmt . Errorf ( "previous token is not quantifiable" )
stateToAdd , err := kleene ( * s1 )
if err != nil {
return Reg { } , err
}
stateToAdd := kleene ( * s1 )
nfa = append ( nfa , stateToAdd )
case PLUS : // a+ is equivalent to aa*
s1 := mustPop ( & nfa )
s2 := kleene ( * s1 )
s2 , err := kleene ( * s1 )
if err != nil {
return Reg { } , err
}
s1 = concatenate ( s1 , s2 )
nfa = append ( nfa , s1 )
case QUESTION : // ab? is equivalent to a(b|)
s1 := mustPop ( & nfa )
s1 , err := pop ( & nfa )
if err != nil {
return Reg { } , fmt . Errorf ( "error applying question operator" )
}
s2 := question ( s1 )
nfa = append ( nfa , s2 )
case PIPE :
@ -1068,7 +1080,10 @@ func thompson(re []postfixNode) (Reg, error) {
stateToAdd = concatenate ( stateToAdd , cloneState ( state ) )
}
if c . endReps == INFINITE_REPS { // Case 3
s2 := kleene ( * state )
s2 , err := kleene ( * state )
if err != nil {
return Reg { } , err
}
stateToAdd = concatenate ( stateToAdd , s2 )
} else { // Case 2
for i := c . startReps ; i < c . endReps ; i ++ {