5 Commits

4 changed files with 47 additions and 19 deletions

View File

@@ -31,6 +31,22 @@ func (re Reg) String() string {
return re.str return re.str
} }
// MarshalText implements [encoding.TextMarshaler]. The output is equivalent to that of [Reg.String].
// Any flags passed as arguments (including calling [Reg.Longest]) are lost.
func (re *Reg) MarshalText() ([]byte, error) {
return []byte(re.String()), nil
}
// UnmarshalText implements [encoding.TextUnmarshaler]. It calls [Reg.Compile] on the given byte-slice. If it returns successfully,
// then the result of the compilation is stored in re. The result of [Reg.Compile] is returned.
func (re *Reg) UnmarshalText(text []byte) error {
newReg, err := Compile(string(text))
if err == nil {
*re = newReg
}
return err
}
func (re *Reg) Longest() { func (re *Reg) Longest() {
re.preferLongest = true re.preferLongest = true
} }

View File

@@ -105,23 +105,7 @@ The key differences are mentioned below.
1. Greediness: 1. Greediness:
This engine does not support non-greedy operators. All operators are always greedy in nature, and will try This engine currently does not support non-greedy operators.
to match as much as they can, while still allowing for a successful match. For example, given the regex:
y*y
The engine will match as many 'y's as it can, while still allowing the trailing 'y' to be matched.
Another, more subtle example is the following regex:
x|xx
While the stdlib implementation (and most other engines) will prefer matching the first item of the alternation,
this engine will go for the longest possible match, regardless of the order of the alternation. Although this
strays from the convention, it results in a nice rule-of-thumb - the engine is ALWAYS greedy.
The stdlib implementation has a function [regexp.Regexp.Longest] which makes future searches prefer the longest match.
That is the default (and unchangable) behavior in this engine.
2. Byte-slices and runes: 2. Byte-slices and runes:
@@ -166,7 +150,7 @@ The following features from [regexp] are (currently) NOT supported:
1. Named capturing groups 1. Named capturing groups
2. Non-greedy operators 2. Non-greedy operators
3. Unicode character classes 3. Unicode character classes
4. Embedded flags (flags are passed as arguments to [Compile]) 4. Embedded flags (flags are instead passed as arguments to [Compile])
5. Literal text with \Q ... \E 5. Literal text with \Q ... \E
The following features are not available in [regexp], but are supported in my engine: The following features are not available in [regexp], but are supported in my engine:

View File

@@ -154,3 +154,11 @@ func ExampleReg_Longest() {
// Output: x // Output: x
// xx // xx
} }
func ExampleReg_ReplaceAll() {
regexStr := `(\d)(\w)`
inputStr := "5d9t"
regexComp := regex.MustCompile(regexStr)
fmt.Println(regexComp.ReplaceAll(inputStr, `$2$1`))
// Output: d5t9
}

View File

@@ -350,7 +350,7 @@ func (re Reg) Expand(dst string, template string, src string, match Match) strin
i++ i++
} else { } else {
numStr := "" numStr := ""
for unicode.IsDigit(templateRuneSlc[i]) { for i < len(templateRuneSlc) && unicode.IsDigit(templateRuneSlc[i]) {
numStr += string(templateRuneSlc[i]) numStr += string(templateRuneSlc[i])
i++ i++
} }
@@ -395,3 +395,23 @@ func (re Reg) LiteralPrefix() (prefix string, complete bool) {
} }
return prefix, complete return prefix, complete
} }
// ReplaceAll replaces all matches of the expression in src, with the text in repl. In repl, variables are interpreted
// as they are in [Reg.Expand]. The resulting string is returned.
func (re Reg) ReplaceAll(src string, repl string) string {
matches := re.FindAllSubmatch(src)
i := 0
currentMatch := 0
dst := ""
for i < len(src) {
if currentMatch <= len(matches) && matches[currentMatch][0].IsValid() && i == matches[currentMatch][0].StartIdx {
dst += re.Expand("", repl, src, matches[currentMatch])
i = matches[currentMatch][0].EndIdx
currentMatch++
} else {
dst += string(src[i])
i++
}
}
return dst
}