7 Commits

6 changed files with 26 additions and 13 deletions

View File

@@ -2,8 +2,8 @@
Kleingrep is a regular expression engine, providing a library and command-line tool written in Go. Kleingrep is a regular expression engine, providing a library and command-line tool written in Go.
It aims to provide a more featureful engine, compared to the one in It aims to provide a more featureful engine, compared to the one in Go's
[Go's standard library](https://pkg.go.dev/regexp), while retaining some semblance of efficiency. [regexp](https://pkg.go.dev/regexp), while retaining some semblance of efficiency.
The engine does __not__ use backtracking, relying on the NFA-based method described in The engine does __not__ use backtracking, relying on the NFA-based method described in
[Russ Cox's articles](https://swtch.com/~rsc/regexp). As such, it is immune to catastrophic backtracking. [Russ Cox's articles](https://swtch.com/~rsc/regexp). As such, it is immune to catastrophic backtracking.

View File

@@ -204,10 +204,12 @@ func main() {
color.New(color.FgRed).Fprintf(out, "%c", c) color.New(color.FgRed).Fprintf(out, "%c", c)
// Newline after every match - only if -o is enabled and -v is disabled. // Newline after every match - only if -o is enabled and -v is disabled.
if *onlyFlag && !(*invertFlag) { if *onlyFlag && !(*invertFlag) {
for _, idx := range matchIndices { for matchIdxNum, idx := range matchIndices {
if i+1 == idx[0].EndIdx { // End index is one more than last index of match if matchIdxNum < len(matchIndices)-1 { // Only print a newline afte printing a match, if there are multiple matches on the line, and we aren't on the last one. This is because the newline that gets added at the end will take care of that.
fmt.Fprintf(out, "\n") if i+1 == idx[0].EndIdx { // End index is one more than last index of match
break fmt.Fprintf(out, "\n")
break
}
} }
} }
} }
@@ -222,6 +224,10 @@ func main() {
if err != nil { if err != nil {
panic(err) panic(err)
} }
fmt.Println() // If the last character in the string wasn't a newline, AND we either have don't -o set or we do (and we've matched something), then print a newline
if (len(test_str_runes) > 0 && test_str_runes[len(test_str_runes)-1] != '\n') &&
(!*onlyFlag || indicesToPrint.len() > 0) {
fmt.Println()
}
} }
} }

View File

@@ -36,3 +36,7 @@ func (s uniq_arr[T]) values() []T {
} }
return toRet return toRet
} }
func (s uniq_arr[T]) len() int {
return len(s.backingMap)
}

View File

@@ -410,10 +410,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if i >= len(re_runes) { if i >= len(re_runes) {
return nil, fmt.Errorf("unclosed lookaround") return nil, fmt.Errorf("unclosed lookaround")
} }
if re_runes[i] == '(' || re_runes[i] == nonCapLparenRune { if (re_runes[i] == '(' && re_runes[i-1] != '\\') || re_runes[i] == nonCapLparenRune {
numOpenParens++ numOpenParens++
} }
if re_runes[i] == ')' { if re_runes[i] == ')' && re_runes[i-1] != '\\' {
numOpenParens-- numOpenParens--
if numOpenParens == 0 { if numOpenParens == 0 {
break break
@@ -498,7 +498,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' { } else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
charClassInverted := (re_postfix[i] == 'P') charClassInverted := (re_postfix[i] == 'P')
charsInClass := []rune{} var charsInClass []rune
i++ i++
if isUnicodeCharClassLetter(re_postfix[i]) { if isUnicodeCharClassLetter(re_postfix[i]) {
var err error var err error
@@ -589,10 +589,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if i >= len(re_postfix) { if i >= len(re_postfix) {
return nil, fmt.Errorf("unclosed lookaround") return nil, fmt.Errorf("unclosed lookaround")
} }
if re_postfix[i] == '(' || re_postfix[i] == nonCapLparenRune { if (re_postfix[i] == '(' && re_postfix[i-1] != '\\') || re_postfix[i] == nonCapLparenRune {
numOpenParens++ numOpenParens++
} }
if re_postfix[i] == ')' { if re_postfix[i] == ')' && re_postfix[i-1] != '\\' {
numOpenParens-- numOpenParens--
if numOpenParens == 0 { if numOpenParens == 0 {
break break
@@ -713,7 +713,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
} }
} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' { } else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
charClassInverted := (re_postfix[i] == 'P') charClassInverted := (re_postfix[i] == 'P')
charsInList := []rune{} var charsInList []rune
i++ i++
if isUnicodeCharClassLetter(re_postfix[i]) { if isUnicodeCharClassLetter(re_postfix[i]) {
var err error var err error

View File

@@ -161,6 +161,7 @@ The following features from [regexp] are (currently) NOT supported:
2. Negated POSIX classes 2. Negated POSIX classes
3. Embedded flags (flags are instead passed as arguments to [Compile]) 3. Embedded flags (flags are instead passed as arguments to [Compile])
4. Literal text with \Q ... \E 4. Literal text with \Q ... \E
5. Finite repetition with no start (defaulting at 0)
The following features are not available in [regexp], but are supported in my engine: The following features are not available in [regexp], but are supported in my engine:
1. Lookarounds 1. Lookarounds

View File

@@ -545,6 +545,8 @@ var reTests = []struct {
{`\pN+`, nil, `123abc456def`, []Group{{0, 3}, {6, 9}}}, {`\pN+`, nil, `123abc456def`, []Group{{0, 3}, {6, 9}}},
{`\PN+`, nil, `123abc456def`, []Group{{3, 6}, {9, 12}}}, {`\PN+`, nil, `123abc456def`, []Group{{3, 6}, {9, 12}}},
{`[\p{Greek}\p{Cyrillic}]`, nil, `ΣωШД`, []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}}}, {`[\p{Greek}\p{Cyrillic}]`, nil, `ΣωШД`, []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}}},
{`(?<=\().*?(?=\))`, nil, `(abc)`, []Group{{1, 4}}},
} }
var groupTests = []struct { var groupTests = []struct {