Started working on unicode character classes
This commit is contained in:
		| @@ -108,6 +108,48 @@ func getPOSIXClass(str []rune) (bool, string) { | ||||
| 	return true, rtv | ||||
| } | ||||
|  | ||||
| // isUnicodeCharClassLetter returns whether or not the given letter represents a unicode character class. | ||||
| func isUnicodeCharClassLetter(c rune) bool { | ||||
| 	return slices.Contains([]rune{'L', 'M', 'S', 'N', 'P', 'C', 'Z'}, c) | ||||
| } | ||||
|  | ||||
| // rangeTableToRuneSlice converts the given range table into a rune slice and returns it. | ||||
| func rangeTableToRuneSlice(rangetable *unicode.RangeTable) []rune { | ||||
| 	var rtv []rune | ||||
| 	for _, r := range rangetable.R16 { | ||||
| 		for c := r.Lo; c < r.Hi; c += r.Stride { | ||||
| 			rtv = append(rtv, rune(c)) | ||||
| 		} | ||||
| 	} | ||||
| 	for _, r := range rangetable.R32 { | ||||
| 		for c := r.Lo; c < r.Hi; c += r.Stride { | ||||
| 			rtv = append(rtv, rune(c)) | ||||
| 		} | ||||
| 	} | ||||
| 	return rtv | ||||
| } | ||||
|  | ||||
| // unicodeCharClassToRange converts the given unicode character class name into a list of characters in that class. | ||||
| // This class could also be a single letter eg. 'C'. | ||||
| func unicodeCharClassToRange(class string) ([]rune, error) { | ||||
| 	if len(class) == 0 { | ||||
| 		return nil, fmt.Errorf("empty unicode character class") | ||||
| 	} | ||||
| 	if len(class) == 1 || len(class) == 2 { | ||||
| 		if rangeTable, ok := unicode.Categories[class]; ok { | ||||
| 			return rangeTableToRuneSlice(rangeTable), nil | ||||
| 		} else { | ||||
| 			return nil, fmt.Errorf("invalid short unicode character class") | ||||
| 		} | ||||
| 	} else { | ||||
| 		if rangeTable, ok := unicode.Scripts[class]; ok { | ||||
| 			return rangeTableToRuneSlice(rangeTable), nil | ||||
| 		} else { | ||||
| 			return nil, fmt.Errorf("invalid long unicode character class") | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Stores whether the case-insensitive flag has been enabled. | ||||
| var caseInsensitive bool | ||||
|  | ||||
| @@ -313,6 +355,25 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { | ||||
| 				} else { | ||||
| 					return nil, fmt.Errorf("invalid hex value in expression") | ||||
| 				} | ||||
| 			} else if re_runes[i] == 'p' || re_runes[i] == 'P' { // Unicode character class (P is negated unicode charclass) | ||||
| 				re_postfix = append(re_postfix, re_runes[i]) | ||||
| 				i++ | ||||
| 				if i >= len(re_runes) { | ||||
| 					return nil, fmt.Errorf("error parsing unicode character class in expression") | ||||
| 				} | ||||
| 				if re_runes[i] == '{' { // Full name charclass | ||||
| 					for re_runes[i] != '}' { | ||||
| 						re_postfix = append(re_postfix, re_runes[i]) | ||||
| 						i++ | ||||
| 					} | ||||
| 					re_postfix = append(re_postfix, re_runes[i]) | ||||
| 					i++ | ||||
| 				} else if isUnicodeCharClassLetter(re_runes[i]) { | ||||
| 					re_postfix = append(re_postfix, re_runes[i]) | ||||
| 					i++ | ||||
| 				} else { | ||||
| 					return nil, fmt.Errorf("error parsing unicode character class in expression") | ||||
| 				} | ||||
| 			} else if re_runes[i] == '0' { // Start of octal value | ||||
| 				numDigits := 1 | ||||
| 				for i+numDigits < len(re_runes) && numDigits < 4 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 4, starting with 0) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user