Compare commits
7 Commits
v0.3.0
...
d62a429cce
| Author | SHA1 | Date | |
|---|---|---|---|
| d62a429cce | |||
| 7b31031553 | |||
| 38c842cb07 | |||
| 9f9af36be8 | |||
| 8217b67122 | |||
| 1f06dcef64 | |||
| 119475b41b |
@@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
Kleingrep is a regular expression engine, providing a library and command-line tool written in Go.
|
Kleingrep is a regular expression engine, providing a library and command-line tool written in Go.
|
||||||
|
|
||||||
It aims to provide a more featureful engine, compared to the one in
|
It aims to provide a more featureful engine, compared to the one in Go's
|
||||||
[Go's standard library](https://pkg.go.dev/regexp), while retaining some semblance of efficiency.
|
[regexp](https://pkg.go.dev/regexp), while retaining some semblance of efficiency.
|
||||||
|
|
||||||
The engine does __not__ use backtracking, relying on the NFA-based method described in
|
The engine does __not__ use backtracking, relying on the NFA-based method described in
|
||||||
[Russ Cox's articles](https://swtch.com/~rsc/regexp). As such, it is immune to catastrophic backtracking.
|
[Russ Cox's articles](https://swtch.com/~rsc/regexp). As such, it is immune to catastrophic backtracking.
|
||||||
|
|||||||
16
cmd/main.go
16
cmd/main.go
@@ -204,10 +204,12 @@ func main() {
|
|||||||
color.New(color.FgRed).Fprintf(out, "%c", c)
|
color.New(color.FgRed).Fprintf(out, "%c", c)
|
||||||
// Newline after every match - only if -o is enabled and -v is disabled.
|
// Newline after every match - only if -o is enabled and -v is disabled.
|
||||||
if *onlyFlag && !(*invertFlag) {
|
if *onlyFlag && !(*invertFlag) {
|
||||||
for _, idx := range matchIndices {
|
for matchIdxNum, idx := range matchIndices {
|
||||||
if i+1 == idx[0].EndIdx { // End index is one more than last index of match
|
if matchIdxNum < len(matchIndices)-1 { // Only print a newline afte printing a match, if there are multiple matches on the line, and we aren't on the last one. This is because the newline that gets added at the end will take care of that.
|
||||||
fmt.Fprintf(out, "\n")
|
if i+1 == idx[0].EndIdx { // End index is one more than last index of match
|
||||||
break
|
fmt.Fprintf(out, "\n")
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -222,6 +224,10 @@ func main() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
fmt.Println()
|
// If the last character in the string wasn't a newline, AND we either have don't -o set or we do (and we've matched something), then print a newline
|
||||||
|
if (len(test_str_runes) > 0 && test_str_runes[len(test_str_runes)-1] != '\n') &&
|
||||||
|
(!*onlyFlag || indicesToPrint.len() > 0) {
|
||||||
|
fmt.Println()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,3 +36,7 @@ func (s uniq_arr[T]) values() []T {
|
|||||||
}
|
}
|
||||||
return toRet
|
return toRet
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s uniq_arr[T]) len() int {
|
||||||
|
return len(s.backingMap)
|
||||||
|
}
|
||||||
|
|||||||
@@ -410,10 +410,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
if i >= len(re_runes) {
|
if i >= len(re_runes) {
|
||||||
return nil, fmt.Errorf("unclosed lookaround")
|
return nil, fmt.Errorf("unclosed lookaround")
|
||||||
}
|
}
|
||||||
if re_runes[i] == '(' || re_runes[i] == nonCapLparenRune {
|
if (re_runes[i] == '(' && re_runes[i-1] != '\\') || re_runes[i] == nonCapLparenRune {
|
||||||
numOpenParens++
|
numOpenParens++
|
||||||
}
|
}
|
||||||
if re_runes[i] == ')' {
|
if re_runes[i] == ')' && re_runes[i-1] != '\\' {
|
||||||
numOpenParens--
|
numOpenParens--
|
||||||
if numOpenParens == 0 {
|
if numOpenParens == 0 {
|
||||||
break
|
break
|
||||||
@@ -498,7 +498,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
|
} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
|
||||||
charClassInverted := (re_postfix[i] == 'P')
|
charClassInverted := (re_postfix[i] == 'P')
|
||||||
charsInClass := []rune{}
|
var charsInClass []rune
|
||||||
i++
|
i++
|
||||||
if isUnicodeCharClassLetter(re_postfix[i]) {
|
if isUnicodeCharClassLetter(re_postfix[i]) {
|
||||||
var err error
|
var err error
|
||||||
@@ -589,10 +589,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
if i >= len(re_postfix) {
|
if i >= len(re_postfix) {
|
||||||
return nil, fmt.Errorf("unclosed lookaround")
|
return nil, fmt.Errorf("unclosed lookaround")
|
||||||
}
|
}
|
||||||
if re_postfix[i] == '(' || re_postfix[i] == nonCapLparenRune {
|
if (re_postfix[i] == '(' && re_postfix[i-1] != '\\') || re_postfix[i] == nonCapLparenRune {
|
||||||
numOpenParens++
|
numOpenParens++
|
||||||
}
|
}
|
||||||
if re_postfix[i] == ')' {
|
if re_postfix[i] == ')' && re_postfix[i-1] != '\\' {
|
||||||
numOpenParens--
|
numOpenParens--
|
||||||
if numOpenParens == 0 {
|
if numOpenParens == 0 {
|
||||||
break
|
break
|
||||||
@@ -713,7 +713,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
}
|
}
|
||||||
} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
|
} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
|
||||||
charClassInverted := (re_postfix[i] == 'P')
|
charClassInverted := (re_postfix[i] == 'P')
|
||||||
charsInList := []rune{}
|
var charsInList []rune
|
||||||
i++
|
i++
|
||||||
if isUnicodeCharClassLetter(re_postfix[i]) {
|
if isUnicodeCharClassLetter(re_postfix[i]) {
|
||||||
var err error
|
var err error
|
||||||
|
|||||||
@@ -161,6 +161,7 @@ The following features from [regexp] are (currently) NOT supported:
|
|||||||
2. Negated POSIX classes
|
2. Negated POSIX classes
|
||||||
3. Embedded flags (flags are instead passed as arguments to [Compile])
|
3. Embedded flags (flags are instead passed as arguments to [Compile])
|
||||||
4. Literal text with \Q ... \E
|
4. Literal text with \Q ... \E
|
||||||
|
5. Finite repetition with no start (defaulting at 0)
|
||||||
|
|
||||||
The following features are not available in [regexp], but are supported in my engine:
|
The following features are not available in [regexp], but are supported in my engine:
|
||||||
1. Lookarounds
|
1. Lookarounds
|
||||||
|
|||||||
@@ -545,6 +545,8 @@ var reTests = []struct {
|
|||||||
{`\pN+`, nil, `123abc456def`, []Group{{0, 3}, {6, 9}}},
|
{`\pN+`, nil, `123abc456def`, []Group{{0, 3}, {6, 9}}},
|
||||||
{`\PN+`, nil, `123abc456def`, []Group{{3, 6}, {9, 12}}},
|
{`\PN+`, nil, `123abc456def`, []Group{{3, 6}, {9, 12}}},
|
||||||
{`[\p{Greek}\p{Cyrillic}]`, nil, `ΣωШД`, []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}}},
|
{`[\p{Greek}\p{Cyrillic}]`, nil, `ΣωШД`, []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}}},
|
||||||
|
|
||||||
|
{`(?<=\().*?(?=\))`, nil, `(abc)`, []Group{{1, 4}}},
|
||||||
}
|
}
|
||||||
|
|
||||||
var groupTests = []struct {
|
var groupTests = []struct {
|
||||||
|
|||||||
Reference in New Issue
Block a user