5 Commits

4 changed files with 47 additions and 19 deletions

View File

@@ -31,6 +31,22 @@ func (re Reg) String() string {
return re.str
}
// MarshalText implements [encoding.TextMarshaler]. The output is equivalent to that of [Reg.String].
// Any flags passed as arguments (including calling [Reg.Longest]) are lost.
func (re *Reg) MarshalText() ([]byte, error) {
return []byte(re.String()), nil
}
// UnmarshalText implements [encoding.TextUnmarshaler]. It calls [Reg.Compile] on the given byte-slice. If it returns successfully,
// then the result of the compilation is stored in re. The result of [Reg.Compile] is returned.
func (re *Reg) UnmarshalText(text []byte) error {
newReg, err := Compile(string(text))
if err == nil {
*re = newReg
}
return err
}
func (re *Reg) Longest() {
re.preferLongest = true
}

View File

@@ -105,23 +105,7 @@ The key differences are mentioned below.
1. Greediness:
This engine does not support non-greedy operators. All operators are always greedy in nature, and will try
to match as much as they can, while still allowing for a successful match. For example, given the regex:
y*y
The engine will match as many 'y's as it can, while still allowing the trailing 'y' to be matched.
Another, more subtle example is the following regex:
x|xx
While the stdlib implementation (and most other engines) will prefer matching the first item of the alternation,
this engine will go for the longest possible match, regardless of the order of the alternation. Although this
strays from the convention, it results in a nice rule-of-thumb - the engine is ALWAYS greedy.
The stdlib implementation has a function [regexp.Regexp.Longest] which makes future searches prefer the longest match.
That is the default (and unchangable) behavior in this engine.
This engine currently does not support non-greedy operators.
2. Byte-slices and runes:
@@ -166,7 +150,7 @@ The following features from [regexp] are (currently) NOT supported:
1. Named capturing groups
2. Non-greedy operators
3. Unicode character classes
4. Embedded flags (flags are passed as arguments to [Compile])
4. Embedded flags (flags are instead passed as arguments to [Compile])
5. Literal text with \Q ... \E
The following features are not available in [regexp], but are supported in my engine:

View File

@@ -154,3 +154,11 @@ func ExampleReg_Longest() {
// Output: x
// xx
}
func ExampleReg_ReplaceAll() {
regexStr := `(\d)(\w)`
inputStr := "5d9t"
regexComp := regex.MustCompile(regexStr)
fmt.Println(regexComp.ReplaceAll(inputStr, `$2$1`))
// Output: d5t9
}

View File

@@ -350,7 +350,7 @@ func (re Reg) Expand(dst string, template string, src string, match Match) strin
i++
} else {
numStr := ""
for unicode.IsDigit(templateRuneSlc[i]) {
for i < len(templateRuneSlc) && unicode.IsDigit(templateRuneSlc[i]) {
numStr += string(templateRuneSlc[i])
i++
}
@@ -395,3 +395,23 @@ func (re Reg) LiteralPrefix() (prefix string, complete bool) {
}
return prefix, complete
}
// ReplaceAll replaces all matches of the expression in src, with the text in repl. In repl, variables are interpreted
// as they are in [Reg.Expand]. The resulting string is returned.
func (re Reg) ReplaceAll(src string, repl string) string {
matches := re.FindAllSubmatch(src)
i := 0
currentMatch := 0
dst := ""
for i < len(src) {
if currentMatch <= len(matches) && matches[currentMatch][0].IsValid() && i == matches[currentMatch][0].StartIdx {
dst += re.Expand("", repl, src, matches[currentMatch])
i = matches[currentMatch][0].EndIdx
currentMatch++
} else {
dst += string(src[i])
i++
}
}
return dst
}