diff --git a/regex/compile.go b/regex/compile.go index 428df30..62dbfb9 100644 --- a/regex/compile.go +++ b/regex/compile.go @@ -117,12 +117,12 @@ func isUnicodeCharClassLetter(c rune) bool { func rangeTableToRuneSlice(rangetable *unicode.RangeTable) []rune { var rtv []rune for _, r := range rangetable.R16 { - for c := r.Lo; c < r.Hi; c += r.Stride { + for c := r.Lo; c <= r.Hi; c += r.Stride { rtv = append(rtv, rune(c)) } } for _, r := range rangetable.R32 { - for c := r.Lo; c < r.Hi; c += r.Stride { + for c := r.Lo; c <= r.Hi; c += r.Stride { rtv = append(rtv, rune(c)) } } @@ -351,7 +351,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { } } else if isHex(re_runes[i]) { re_postfix = append(re_postfix, re_runes[i:i+2]...) - i += 2 + i += 1 // I don't skip forward 2 steps, because the second step will happen with the loop increment } else { return nil, fmt.Errorf("invalid hex value in expression") } @@ -374,6 +374,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { } else { return nil, fmt.Errorf("error parsing unicode character class in expression") } + i-- // The loop increment at the top will move us forward } else if re_runes[i] == '0' { // Start of octal value numDigits := 1 for i+numDigits < len(re_runes) && numDigits < 4 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 4, starting with 0) @@ -499,10 +500,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { return nil, err } var toAppend postfixNode - if re_postfix[i] == 'p' { + if !charClassInverted { toAppend = newPostfixNode(chars...) - } - if re_postfix[i] == 'P' { + } else { toAppend = newPostfixDotNode() toAppend.except = append([]postfixNode{}, newPostfixNode(chars...)) } @@ -711,7 +711,45 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) { } else { return nil, fmt.Errorf("not enough hex characters found in character class") } + } else if re_postfix[i] == 'p' || re_postfix[i] == 'P' { + charClassInverted := (re_postfix[i] == 'P') + i++ + if isUnicodeCharClassLetter(re_postfix[i]) { + charsInList, err := unicodeCharClassToRange(string(re_postfix[i])) + if err != nil { + return nil, err + } + if !charClassInverted { + chars = append(chars, newPostfixNode(charsInList...)) + } else { + toAppend := newPostfixDotNode() + toAppend.except = append([]postfixNode{}, newPostfixNode(charsInList...)) + chars = append(chars, toAppend) + } + } else if re_postfix[i] == '{' { + i++ // Skip opening bracket + unicodeCharClassStr := "" + for re_postfix[i] != '}' { + unicodeCharClassStr += string(re_postfix[i]) + i++ + } + charsInList, err := unicodeCharClassToRange(unicodeCharClassStr) + if err != nil { + return nil, err + } + if !charClassInverted { + chars = append(chars, newPostfixNode(charsInList...)) + } else { + toAppend := newPostfixDotNode() + toAppend.except = append([]postfixNode{}, newPostfixNode(charsInList...)) + chars = append(chars, toAppend) + } + } else { + return nil, fmt.Errorf("error parsing unicode character class in expression") + } + } else if re_postfix[i] == '0' { // Octal value + var octVal int64 var octValStr string numDigitsParsed := 0