Added unicode charclass support within character classes; Fixed bugs with hex classes and unicode classes

implementUnicodeCharClass
Aadhavan Srinivasan 4 weeks ago
parent 7045711860
commit fde3784e5a

@ -117,12 +117,12 @@ func isUnicodeCharClassLetter(c rune) bool {
func rangeTableToRuneSlice(rangetable *unicode.RangeTable) []rune {
var rtv []rune
for _, r := range rangetable.R16 {
for c := r.Lo; c < r.Hi; c += r.Stride {
for c := r.Lo; c <= r.Hi; c += r.Stride {
rtv = append(rtv, rune(c))
}
}
for _, r := range rangetable.R32 {
for c := r.Lo; c < r.Hi; c += r.Stride {
for c := r.Lo; c <= r.Hi; c += r.Stride {
rtv = append(rtv, rune(c))
}
}
@ -351,7 +351,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
}
} else if isHex(re_runes[i]) {
re_postfix = append(re_postfix, re_runes[i:i+2]...)
i += 2
i += 1 // I don't skip forward 2 steps, because the second step will happen with the loop increment
} else {
return nil, fmt.Errorf("invalid hex value in expression")
}
@ -374,6 +374,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else {
return nil, fmt.Errorf("error parsing unicode character class in expression")
}
i-- // The loop increment at the top will move us forward
} else if re_runes[i] == '0' { // Start of octal value
numDigits := 1
for i+numDigits < len(re_runes) && numDigits < 4 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 4, starting with 0)
@ -499,10 +500,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
return nil, err
}
var toAppend postfixNode
if re_postfix[i] == 'p' {
if !charClassInverted {
toAppend = newPostfixNode(chars...)
}
if re_postfix[i] == 'P' {
} else {
toAppend = newPostfixDotNode()
toAppend.except = append([]postfixNode{}, newPostfixNode(chars...))
}
@ -711,7 +711,45 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else {
return nil, fmt.Errorf("not enough hex characters found in character class")
}
} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
charClassInverted := (re_postfix[i] == 'P')
i++
if isUnicodeCharClassLetter(re_postfix[i]) {
charsInList, err := unicodeCharClassToRange(string(re_postfix[i]))
if err != nil {
return nil, err
}
if !charClassInverted {
chars = append(chars, newPostfixNode(charsInList...))
} else {
toAppend := newPostfixDotNode()
toAppend.except = append([]postfixNode{}, newPostfixNode(charsInList...))
chars = append(chars, toAppend)
}
} else if re_postfix[i] == '{' {
i++ // Skip opening bracket
unicodeCharClassStr := ""
for re_postfix[i] != '}' {
unicodeCharClassStr += string(re_postfix[i])
i++
}
charsInList, err := unicodeCharClassToRange(unicodeCharClassStr)
if err != nil {
return nil, err
}
if !charClassInverted {
chars = append(chars, newPostfixNode(charsInList...))
} else {
toAppend := newPostfixDotNode()
toAppend.except = append([]postfixNode{}, newPostfixNode(charsInList...))
chars = append(chars, toAppend)
}
} else {
return nil, fmt.Errorf("error parsing unicode character class in expression")
}
} else if re_postfix[i] == '0' { // Octal value
var octVal int64
var octValStr string
numDigitsParsed := 0

Loading…
Cancel
Save