@ -108,6 +108,48 @@ func getPOSIXClass(str []rune) (bool, string) {
return true , rtv
}
// isUnicodeCharClassLetter returns whether or not the given letter represents a unicode character class.
func isUnicodeCharClassLetter ( c rune ) bool {
return slices . Contains ( [ ] rune { 'L' , 'M' , 'S' , 'N' , 'P' , 'C' , 'Z' } , c )
}
// rangeTableToRuneSlice converts the given range table into a rune slice and returns it.
func rangeTableToRuneSlice ( rangetable * unicode . RangeTable ) [ ] rune {
var rtv [ ] rune
for _ , r := range rangetable . R16 {
for c := r . Lo ; c <= r . Hi ; c += r . Stride {
rtv = append ( rtv , rune ( c ) )
}
}
for _ , r := range rangetable . R32 {
for c := r . Lo ; c <= r . Hi ; c += r . Stride {
rtv = append ( rtv , rune ( c ) )
}
}
return rtv
}
// unicodeCharClassToRange converts the given unicode character class name into a list of characters in that class.
// This class could also be a single letter eg. 'C'.
func unicodeCharClassToRange ( class string ) ( [ ] rune , error ) {
if len ( class ) == 0 {
return nil , fmt . Errorf ( "empty unicode character class" )
}
if len ( class ) == 1 || len ( class ) == 2 {
if rangeTable , ok := unicode . Categories [ class ] ; ok {
return rangeTableToRuneSlice ( rangeTable ) , nil
} else {
return nil , fmt . Errorf ( "invalid short unicode character class" )
}
} else {
if rangeTable , ok := unicode . Scripts [ class ] ; ok {
return rangeTableToRuneSlice ( rangeTable ) , nil
} else {
return nil , fmt . Errorf ( "invalid long unicode character class" )
}
}
}
// Stores whether the case-insensitive flag has been enabled.
var caseInsensitive bool
@ -309,10 +351,30 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
} else if isHex ( re_runes [ i ] ) {
re_postfix = append ( re_postfix , re_runes [ i : i + 2 ] ... )
i += 2
i += 1 // I don't skip forward 2 steps, because the second step will happen with the loop increment
} else {
return nil , fmt . Errorf ( "invalid hex value in expression" )
}
} else if re_runes [ i ] == 'p' || re_runes [ i ] == 'P' { // Unicode character class (P is negated unicode charclass)
re_postfix = append ( re_postfix , re_runes [ i ] )
i ++
if i >= len ( re_runes ) {
return nil , fmt . Errorf ( "error parsing unicode character class in expression" )
}
if re_runes [ i ] == '{' { // Full name charclass
for re_runes [ i ] != '}' {
re_postfix = append ( re_postfix , re_runes [ i ] )
i ++
}
re_postfix = append ( re_postfix , re_runes [ i ] )
i ++
} else if isUnicodeCharClassLetter ( re_runes [ i ] ) {
re_postfix = append ( re_postfix , re_runes [ i ] )
i ++
} else {
return nil , fmt . Errorf ( "error parsing unicode character class in expression" )
}
i -- // The loop increment at the top will move us forward
} else if re_runes [ i ] == '0' { // Start of octal value
numDigits := 1
for i + numDigits < len ( re_runes ) && numDigits < 4 && isOctal ( re_runes [ i + numDigits ] ) { // Skip while we see an octal character (max of 4, starting with 0)
@ -429,6 +491,39 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else {
return nil , fmt . Errorf ( "not enough hex characters found in expression" )
}
} else if re_postfix [ i ] == 'p' || re_postfix [ i ] == 'P' {
charClassInverted := ( re_postfix [ i ] == 'P' )
charsInClass := [ ] rune { }
i ++
if isUnicodeCharClassLetter ( re_postfix [ i ] ) {
var err error
charsInClass , err = unicodeCharClassToRange ( string ( re_postfix [ i ] ) )
if err != nil {
return nil , err
}
} else if re_postfix [ i ] == '{' {
i ++ // Skip opening bracket
unicodeCharClassStr := ""
for re_postfix [ i ] != '}' {
unicodeCharClassStr += string ( re_postfix [ i ] )
i ++
}
var err error
charsInClass , err = unicodeCharClassToRange ( unicodeCharClassStr )
if err != nil {
return nil , err
}
} else {
return nil , fmt . Errorf ( "error parsing unicode character class in expression" )
}
var toAppend postfixNode
if ! charClassInverted { // \p
toAppend = newPostfixNode ( charsInClass ... )
} else { // \P
toAppend = newPostfixDotNode ( )
toAppend . except = append ( [ ] postfixNode { } , newPostfixNode ( charsInClass ... ) )
}
outQueue = append ( outQueue , toAppend )
} else if re_postfix [ i ] == '0' { // Octal value
var octVal int64
var octValStr string
@ -611,7 +706,40 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else {
return nil , fmt . Errorf ( "not enough hex characters found in character class" )
}
} else if re_postfix [ i ] == 'p' || re_postfix [ i ] == 'P' {
charClassInverted := ( re_postfix [ i ] == 'P' )
charsInList := [ ] rune { }
i ++
if isUnicodeCharClassLetter ( re_postfix [ i ] ) {
var err error
charsInList , err = unicodeCharClassToRange ( string ( re_postfix [ i ] ) )
if err != nil {
return nil , err
}
} else if re_postfix [ i ] == '{' {
i ++ // Skip opening bracket
unicodeCharClassStr := ""
for re_postfix [ i ] != '}' {
unicodeCharClassStr += string ( re_postfix [ i ] )
i ++
}
var err error
charsInList , err = unicodeCharClassToRange ( unicodeCharClassStr )
if err != nil {
return nil , err
}
} else {
return nil , fmt . Errorf ( "error parsing unicode character class in expression" )
}
if ! charClassInverted {
chars = append ( chars , newPostfixNode ( charsInList ... ) )
} else {
toAppend := newPostfixDotNode ( )
toAppend . except = append ( [ ] postfixNode { } , newPostfixNode ( charsInList ... ) )
chars = append ( chars , toAppend )
}
} else if re_postfix [ i ] == '0' { // Octal value
var octVal int64
var octValStr string
numDigitsParsed := 0