Added unicode charclass support within character classes; Fixed bugs with hex classes and unicode classes

implementUnicodeCharClass
Aadhavan Srinivasan 4 weeks ago
parent 7045711860
commit fde3784e5a

@ -117,12 +117,12 @@ func isUnicodeCharClassLetter(c rune) bool {
func rangeTableToRuneSlice(rangetable *unicode.RangeTable) []rune { func rangeTableToRuneSlice(rangetable *unicode.RangeTable) []rune {
var rtv []rune var rtv []rune
for _, r := range rangetable.R16 { for _, r := range rangetable.R16 {
for c := r.Lo; c < r.Hi; c += r.Stride { for c := r.Lo; c <= r.Hi; c += r.Stride {
rtv = append(rtv, rune(c)) rtv = append(rtv, rune(c))
} }
} }
for _, r := range rangetable.R32 { for _, r := range rangetable.R32 {
for c := r.Lo; c < r.Hi; c += r.Stride { for c := r.Lo; c <= r.Hi; c += r.Stride {
rtv = append(rtv, rune(c)) rtv = append(rtv, rune(c))
} }
} }
@ -351,7 +351,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
} else if isHex(re_runes[i]) { } else if isHex(re_runes[i]) {
re_postfix = append(re_postfix, re_runes[i:i+2]...) re_postfix = append(re_postfix, re_runes[i:i+2]...)
i += 2 i += 1 // I don't skip forward 2 steps, because the second step will happen with the loop increment
} else { } else {
return nil, fmt.Errorf("invalid hex value in expression") return nil, fmt.Errorf("invalid hex value in expression")
} }
@ -374,6 +374,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else { } else {
return nil, fmt.Errorf("error parsing unicode character class in expression") return nil, fmt.Errorf("error parsing unicode character class in expression")
} }
i-- // The loop increment at the top will move us forward
} else if re_runes[i] == '0' { // Start of octal value } else if re_runes[i] == '0' { // Start of octal value
numDigits := 1 numDigits := 1
for i+numDigits < len(re_runes) && numDigits < 4 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 4, starting with 0) for i+numDigits < len(re_runes) && numDigits < 4 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 4, starting with 0)
@ -499,10 +500,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
return nil, err return nil, err
} }
var toAppend postfixNode var toAppend postfixNode
if re_postfix[i] == 'p' { if !charClassInverted {
toAppend = newPostfixNode(chars...) toAppend = newPostfixNode(chars...)
} } else {
if re_postfix[i] == 'P' {
toAppend = newPostfixDotNode() toAppend = newPostfixDotNode()
toAppend.except = append([]postfixNode{}, newPostfixNode(chars...)) toAppend.except = append([]postfixNode{}, newPostfixNode(chars...))
} }
@ -711,7 +711,45 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} else { } else {
return nil, fmt.Errorf("not enough hex characters found in character class") return nil, fmt.Errorf("not enough hex characters found in character class")
} }
} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
charClassInverted := (re_postfix[i] == 'P')
i++
if isUnicodeCharClassLetter(re_postfix[i]) {
charsInList, err := unicodeCharClassToRange(string(re_postfix[i]))
if err != nil {
return nil, err
}
if !charClassInverted {
chars = append(chars, newPostfixNode(charsInList...))
} else {
toAppend := newPostfixDotNode()
toAppend.except = append([]postfixNode{}, newPostfixNode(charsInList...))
chars = append(chars, toAppend)
}
} else if re_postfix[i] == '{' {
i++ // Skip opening bracket
unicodeCharClassStr := ""
for re_postfix[i] != '}' {
unicodeCharClassStr += string(re_postfix[i])
i++
}
charsInList, err := unicodeCharClassToRange(unicodeCharClassStr)
if err != nil {
return nil, err
}
if !charClassInverted {
chars = append(chars, newPostfixNode(charsInList...))
} else {
toAppend := newPostfixDotNode()
toAppend.except = append([]postfixNode{}, newPostfixNode(charsInList...))
chars = append(chars, toAppend)
}
} else {
return nil, fmt.Errorf("error parsing unicode character class in expression")
}
} else if re_postfix[i] == '0' { // Octal value } else if re_postfix[i] == '0' { // Octal value
var octVal int64 var octVal int64
var octValStr string var octValStr string
numDigitsParsed := 0 numDigitsParsed := 0

Loading…
Cancel
Save