Compare commits
13 Commits
Author | SHA1 | Date | |
---|---|---|---|
073f231b89 | |||
3b7257c921 | |||
668df8b70a | |||
214acf7e0f | |||
50221ff4d9 | |||
5ab95f512a | |||
e7da678408 | |||
ab363e2766 | |||
c803e45415 | |||
525296f239 | |||
eb0ab9f7ec | |||
17a7dbae4c | |||
f2279acd98 |
@@ -31,6 +31,22 @@ func (re Reg) String() string {
|
||||
return re.str
|
||||
}
|
||||
|
||||
// MarshalText implements [encoding.TextMarshaler]. The output is equivalent to that of [Reg.String].
|
||||
// Any flags passed as arguments (including calling [Reg.Longest]) are lost.
|
||||
func (re *Reg) MarshalText() ([]byte, error) {
|
||||
return []byte(re.String()), nil
|
||||
}
|
||||
|
||||
// UnmarshalText implements [encoding.TextUnmarshaler]. It calls [Reg.Compile] on the given byte-slice. If it returns successfully,
|
||||
// then the result of the compilation is stored in re. The result of [Reg.Compile] is returned.
|
||||
func (re *Reg) UnmarshalText(text []byte) error {
|
||||
newReg, err := Compile(string(text))
|
||||
if err == nil {
|
||||
*re = newReg
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (re *Reg) Longest() {
|
||||
re.preferLongest = true
|
||||
}
|
||||
|
22
regex/doc.go
22
regex/doc.go
@@ -33,7 +33,7 @@ Perl classes:
|
||||
\d Match any digit character ([0-9])
|
||||
\D Match any non-digit character ([^0-9])
|
||||
\w Match any word character ([a-zA-Z0-9_])
|
||||
\W Match any word character ([^a-zA-Z0-9_])
|
||||
\W Match any non-word character ([^a-zA-Z0-9_])
|
||||
\s Match any whitespace character ([ \t\n])
|
||||
\S Match any non-whitespace character ([^ \t\n])
|
||||
|
||||
@@ -105,23 +105,7 @@ The key differences are mentioned below.
|
||||
|
||||
1. Greediness:
|
||||
|
||||
This engine does not support non-greedy operators. All operators are always greedy in nature, and will try
|
||||
to match as much as they can, while still allowing for a successful match. For example, given the regex:
|
||||
|
||||
y*y
|
||||
|
||||
The engine will match as many 'y's as it can, while still allowing the trailing 'y' to be matched.
|
||||
|
||||
Another, more subtle example is the following regex:
|
||||
|
||||
x|xx
|
||||
|
||||
While the stdlib implementation (and most other engines) will prefer matching the first item of the alternation,
|
||||
this engine will go for the longest possible match, regardless of the order of the alternation. Although this
|
||||
strays from the convention, it results in a nice rule-of-thumb - the engine is ALWAYS greedy.
|
||||
|
||||
The stdlib implementation has a function [regexp.Regexp.Longest] which makes future searches prefer the longest match.
|
||||
That is the default (and unchangable) behavior in this engine.
|
||||
This engine currently does not support non-greedy operators.
|
||||
|
||||
2. Byte-slices and runes:
|
||||
|
||||
@@ -166,7 +150,7 @@ The following features from [regexp] are (currently) NOT supported:
|
||||
1. Named capturing groups
|
||||
2. Non-greedy operators
|
||||
3. Unicode character classes
|
||||
4. Embedded flags (flags are passed as arguments to [Compile])
|
||||
4. Embedded flags (flags are instead passed as arguments to [Compile])
|
||||
5. Literal text with \Q ... \E
|
||||
|
||||
The following features are not available in [regexp], but are supported in my engine:
|
||||
|
@@ -2,6 +2,7 @@ package regex_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitea.twomorecents.org/Rockingcool/kleingrep/regex"
|
||||
)
|
||||
@@ -32,12 +33,12 @@ func ExampleReg_FindAll() {
|
||||
}
|
||||
|
||||
func ExampleReg_FindString() {
|
||||
regexStr := `\d+`
|
||||
regexStr := `\w+\s+(?=sheep)`
|
||||
regexComp := regex.MustCompile(regexStr)
|
||||
|
||||
matchStr := regexComp.FindString("The year of our lord, 2025")
|
||||
matchStr := regexComp.FindString("pink cows and yellow sheep")
|
||||
fmt.Println(matchStr)
|
||||
// Output: 2025
|
||||
// Output: yellow
|
||||
}
|
||||
|
||||
func ExampleReg_FindSubmatch() {
|
||||
@@ -53,6 +54,71 @@ func ExampleReg_FindSubmatch() {
|
||||
// 2 3
|
||||
}
|
||||
|
||||
func ExampleReg_FindStringSubmatch() {
|
||||
regexStr := `(\d{4})-(\d{2})-(\d{2})`
|
||||
regexComp := regex.MustCompile(regexStr)
|
||||
inputStr := `The date is 2025-02-10`
|
||||
|
||||
match := regexComp.FindStringSubmatch(inputStr)
|
||||
fmt.Println(match[1])
|
||||
fmt.Println(match[3])
|
||||
// Output: 2025
|
||||
// 10
|
||||
}
|
||||
|
||||
func ExampleReg_FindAllSubmatch() {
|
||||
regexStr := `(\d)\.(\d)(\d)`
|
||||
regexComp := regex.MustCompile(regexStr)
|
||||
|
||||
matches := regexComp.FindAllSubmatch("3.14+8.97")
|
||||
fmt.Println(matches[0][0]) // 0-group (entire match) of 1st match (0-indexed)
|
||||
fmt.Println(matches[0][1]) // 1st group of 1st match
|
||||
fmt.Println(matches[1][0]) // 0-group of 2nd match
|
||||
fmt.Println(matches[1][1]) // 1st group of 2nd math
|
||||
// Output: 0 4
|
||||
// 0 1
|
||||
// 5 9
|
||||
// 5 6
|
||||
}
|
||||
|
||||
func ExampleReg_FindAllString() {
|
||||
regexStr := `<0-255>\.<0-255>\.<0-255>\.<0-255>`
|
||||
inputStr := `192.168.220.7 pings 9.9.9.9`
|
||||
regexComp := regex.MustCompile(regexStr)
|
||||
|
||||
matchStrs := regexComp.FindAllString(inputStr)
|
||||
|
||||
fmt.Println(matchStrs[0])
|
||||
fmt.Println(matchStrs[1])
|
||||
// Output: 192.168.220.7
|
||||
// 9.9.9.9
|
||||
}
|
||||
|
||||
func ExampleReg_FindAllStringSubmatch() {
|
||||
// 'https' ...
|
||||
// followed by 1 or more alphanumeric characters (including period) ...
|
||||
// then a forward slash ...
|
||||
// followed by one more of :
|
||||
// word character,
|
||||
// question mark,
|
||||
// period,
|
||||
// equals sign
|
||||
regexStr := `https://([a-z0-9\.]+)/([\w.?=]+)`
|
||||
regexComp := regex.MustCompile(regexStr, regex.RE_CASE_INSENSITIVE)
|
||||
inputStr := `You can find me at https://twomorecents.org/index.html and https://news.ycombinator.com/user?id=aadhavans`
|
||||
|
||||
matchIndices := regexComp.FindAllStringSubmatch(inputStr)
|
||||
fmt.Println(matchIndices[0][1]) // 1st group of 1st match (0-indexed)
|
||||
fmt.Println(matchIndices[0][2]) // 2nd group of 1st match
|
||||
fmt.Println(matchIndices[1][1]) // 1st group of 2nd match
|
||||
fmt.Println(matchIndices[1][2]) // 2nd group of 2nd match
|
||||
// Output: twomorecents.org
|
||||
// index.html
|
||||
// news.ycombinator.com
|
||||
// user?id=aadhavans
|
||||
|
||||
}
|
||||
|
||||
func ExampleReg_Expand() {
|
||||
inputStr := `option1: value1
|
||||
option2: value2`
|
||||
@@ -89,3 +155,27 @@ func ExampleReg_Longest() {
|
||||
// Output: x
|
||||
// xx
|
||||
}
|
||||
|
||||
func ExampleReg_ReplaceAll() {
|
||||
regexStr := `(\d)(\w)`
|
||||
inputStr := "5d9t"
|
||||
regexComp := regex.MustCompile(regexStr)
|
||||
fmt.Println(regexComp.ReplaceAll(inputStr, `$2$1`))
|
||||
// Output: d5t9
|
||||
}
|
||||
|
||||
func ExampleReg_ReplaceAllLiteral() {
|
||||
regexStr := `fox|dog`
|
||||
inputStr := "the quick brown fox jumped over the lazy dog"
|
||||
regexComp := regex.MustCompile(regexStr)
|
||||
fmt.Println(regexComp.ReplaceAllLiteral(inputStr, `duck`))
|
||||
// Output: the quick brown duck jumped over the lazy duck
|
||||
}
|
||||
|
||||
func ExampleReg_ReplaceAllFunc() {
|
||||
regexStr := `\w{5,}`
|
||||
inputStr := `all five or more letter words in this string are capitalized`
|
||||
regexComp := regex.MustCompile(regexStr)
|
||||
fmt.Println(regexComp.ReplaceAllFunc(inputStr, strings.ToUpper))
|
||||
// Output: all five or more LETTER WORDS in this STRING are CAPITALIZED
|
||||
}
|
||||
|
@@ -205,6 +205,29 @@ func (re Reg) FindAllSubmatch(str string) []Match {
|
||||
return indices
|
||||
}
|
||||
|
||||
// FindAllSubmatch returns a double-slice of strings. Each slice contains the text of a match, including all submatches.
|
||||
// A return value of nil indicates no match.
|
||||
func (re Reg) FindAllStringSubmatch(str string) [][]string {
|
||||
match := re.FindAllSubmatch(str)
|
||||
if len(match) == 0 {
|
||||
return nil
|
||||
}
|
||||
rtv := make([][]string, len(match))
|
||||
for i := range rtv {
|
||||
rtv[i] = make([]string, re.numGroups+1)
|
||||
}
|
||||
rtv = funcMap(match, func(m Match) []string {
|
||||
return funcMap(m, func(g Group) string {
|
||||
if g.IsValid() {
|
||||
return str[g.StartIdx:g.EndIdx]
|
||||
} else {
|
||||
return ""
|
||||
}
|
||||
})
|
||||
})
|
||||
return rtv
|
||||
}
|
||||
|
||||
func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
|
||||
if stateExists(list, state) || stateExists(visited, state) {
|
||||
return list
|
||||
@@ -327,7 +350,7 @@ func (re Reg) Expand(dst string, template string, src string, match Match) strin
|
||||
i++
|
||||
} else {
|
||||
numStr := ""
|
||||
for unicode.IsDigit(templateRuneSlc[i]) {
|
||||
for i < len(templateRuneSlc) && unicode.IsDigit(templateRuneSlc[i]) {
|
||||
numStr += string(templateRuneSlc[i])
|
||||
i++
|
||||
}
|
||||
@@ -372,3 +395,66 @@ func (re Reg) LiteralPrefix() (prefix string, complete bool) {
|
||||
}
|
||||
return prefix, complete
|
||||
}
|
||||
|
||||
// ReplaceAll replaces all matches of the expression in src, with the text in repl. In repl, variables are interpreted
|
||||
// as they are in [Reg.Expand]. The resulting string is returned.
|
||||
func (re Reg) ReplaceAll(src string, repl string) string {
|
||||
matches := re.FindAllSubmatch(src)
|
||||
i := 0
|
||||
currentMatch := 0
|
||||
dst := ""
|
||||
for i < len(src) {
|
||||
if currentMatch < len(matches) && matches[currentMatch][0].IsValid() && i == matches[currentMatch][0].StartIdx {
|
||||
dst += re.Expand("", repl, src, matches[currentMatch])
|
||||
i = matches[currentMatch][0].EndIdx
|
||||
currentMatch++
|
||||
} else {
|
||||
dst += string(src[i])
|
||||
i++
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// ReplaceAllLiteral replaces all matches of the expression in src, with the text in repl. The text is replaced directly,
|
||||
// without any expansion.
|
||||
func (re Reg) ReplaceAllLiteral(src string, repl string) string {
|
||||
zerogroups := re.FindAll(src)
|
||||
currentMatch := 0
|
||||
i := 0
|
||||
dst := ""
|
||||
|
||||
for i < len(src) {
|
||||
if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
|
||||
dst += repl
|
||||
i = zerogroups[currentMatch].EndIdx
|
||||
currentMatch += 1
|
||||
} else {
|
||||
dst += string(src[i])
|
||||
i++
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// ReplaceAllFunc replaces every match of the expression in src, with the return value of the function replFunc.
|
||||
// replFunc takes in the matched string. The return value is substituted in directly without expasion.
|
||||
func (re Reg) ReplaceAllFunc(src string, replFunc func(string) string) string {
|
||||
zerogroups := re.FindAll(src)
|
||||
currentMatch := 0
|
||||
i := 0
|
||||
dst := ""
|
||||
|
||||
for i < len(src) {
|
||||
if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
|
||||
dst += replFunc(src[zerogroups[currentMatch].StartIdx:zerogroups[currentMatch].EndIdx])
|
||||
i = zerogroups[currentMatch].EndIdx
|
||||
currentMatch += 1
|
||||
} else {
|
||||
dst += string(src[i])
|
||||
i++
|
||||
}
|
||||
}
|
||||
return dst
|
||||
|
||||
}
|
||||
|
@@ -861,6 +861,60 @@ func TestFindStringSubmatch(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAllStringSubmatch(t *testing.T) {
|
||||
for _, test := range groupTests {
|
||||
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
||||
regComp, err := Compile(test.re, test.flags...)
|
||||
if err != nil {
|
||||
if test.result != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
matchStrs := regComp.FindAllStringSubmatch(test.str)
|
||||
if matchStrs == nil {
|
||||
if len(test.result) != 0 {
|
||||
expectedStrs := funcMap(test.result, func(m Match) []string {
|
||||
return funcMap(m, func(g Group) string {
|
||||
if g.IsValid() {
|
||||
return test.str[g.StartIdx:g.EndIdx]
|
||||
} else {
|
||||
return ""
|
||||
}
|
||||
})
|
||||
})
|
||||
t.Errorf("Wanted %v got no match\n", expectedStrs)
|
||||
}
|
||||
} else if len(test.result) == 0 {
|
||||
t.Errorf("Wanted no match got %v\n", matchStrs)
|
||||
} else {
|
||||
expectedStrs := funcMap(test.result, func(m Match) []string {
|
||||
return funcMap(m, func(g Group) string {
|
||||
if g.IsValid() {
|
||||
return test.str[g.StartIdx:g.EndIdx]
|
||||
} else {
|
||||
return ""
|
||||
}
|
||||
})
|
||||
})
|
||||
for i, matchStr := range matchStrs {
|
||||
for j, groupStr := range matchStr {
|
||||
if groupStr == "" {
|
||||
if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
|
||||
t.Errorf("Wanted %v Got %v\n", expectedStrs, matchStrs)
|
||||
}
|
||||
} else {
|
||||
if expectedStrs[i][j] != groupStr {
|
||||
t.Errorf("Wanted %v Got %v\n", expectedStrs, matchStrs)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAllSubmatch(t *testing.T) {
|
||||
for _, test := range groupTests {
|
||||
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
||||
|
Reference in New Issue
Block a user