Added unicode support to dot metacharacter - it now matches _any_ unicode character (almost)

This commit is contained in:
2024-11-18 16:44:43 -05:00
parent 8a1f1dc621
commit c56d81a335
4 changed files with 35 additions and 5 deletions

View File

@@ -8,8 +8,10 @@ import (
var whitespaceChars = []rune{' ', '\t', '\n'}
var digitChars = []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
var wordChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
var notDotChars = []rune{'\n'}
var LBRACKET rune = 0xF0000
var RBRACKET rune = 0xF0001
var ANY_CHAR rune = 0xF0002 // Represents any character - used for 'dot' metacharacter
func dotChars() []rune { // Returns all possible characters represented by the dot metacharacter - this is too tedious to define as a variable, which is why it is a function
start := 0x0020