|
|
|
|
@@ -2,6 +2,8 @@ package regex
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
|
|
|
|
"strconv"
|
|
|
|
|
"unicode"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// A Match represents a match found by the regex in a given string.
|
|
|
|
|
@@ -63,8 +65,8 @@ func copyThread(to *nfaState, from nfaState) {
|
|
|
|
|
|
|
|
|
|
// Find returns the 0-group of the leftmost match of the regex in the given string.
|
|
|
|
|
// An error value != nil indicates that no match was found.
|
|
|
|
|
func (regex Reg) Find(str string) (Group, error) {
|
|
|
|
|
match, err := regex.FindNthMatch(str, 1)
|
|
|
|
|
func (re Reg) Find(str string) (Group, error) {
|
|
|
|
|
match, err := re.FindNthMatch(str, 1)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return Group{}, fmt.Errorf("no matches found")
|
|
|
|
|
}
|
|
|
|
|
@@ -72,15 +74,27 @@ func (regex Reg) Find(str string) (Group, error) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Match returns a boolean value, indicating whether the regex found a match in the given string.
|
|
|
|
|
func (regex Reg) Match(str string) bool {
|
|
|
|
|
_, err := regex.Find(str)
|
|
|
|
|
func (re Reg) Match(str string) bool {
|
|
|
|
|
_, err := re.Find(str)
|
|
|
|
|
return err == nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// CompileMatch compiles expr and returns true if str contains a match of the expression.
|
|
|
|
|
// It is equivalent to [regexp.Match].
|
|
|
|
|
// An optional list of flags may be provided (see [ReFlag]).
|
|
|
|
|
// It returns an error (!= nil) if there was an error compiling the expression.
|
|
|
|
|
func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) {
|
|
|
|
|
re, err := Compile(expr, flags...)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return false, err
|
|
|
|
|
}
|
|
|
|
|
return re.Match(str), nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FindAll returns a slice containing all the 0-groups of the regex in the given string.
|
|
|
|
|
// A 0-group represents the match without any submatches.
|
|
|
|
|
func (regex Reg) FindAll(str string) []Group {
|
|
|
|
|
indices := regex.FindAllSubmatch(str)
|
|
|
|
|
func (re Reg) FindAll(str string) []Group {
|
|
|
|
|
indices := re.FindAllSubmatch(str)
|
|
|
|
|
zeroGroups := funcMap(indices, getZeroGroup)
|
|
|
|
|
return zeroGroups
|
|
|
|
|
}
|
|
|
|
|
@@ -89,8 +103,8 @@ func (regex Reg) FindAll(str string) []Group {
|
|
|
|
|
// The return value will be an empty string in two situations:
|
|
|
|
|
// 1. No match was found
|
|
|
|
|
// 2. The match was an empty string
|
|
|
|
|
func (regex Reg) FindString(str string) string {
|
|
|
|
|
match, err := regex.FindNthMatch(str, 1)
|
|
|
|
|
func (re Reg) FindString(str string) string {
|
|
|
|
|
match, err := re.FindNthMatch(str, 1)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
@@ -103,8 +117,8 @@ func (regex Reg) FindString(str string) string {
|
|
|
|
|
// number of groups. The validity of a group (whether or not it matched anything) can be determined with
|
|
|
|
|
// [Group.IsValid], or by checking that both indices of the group are >= 0.
|
|
|
|
|
// The second-return value is nil if no match was found.
|
|
|
|
|
func (regex Reg) FindSubmatch(str string) (Match, error) {
|
|
|
|
|
match, err := regex.FindNthMatch(str, 1)
|
|
|
|
|
func (re Reg) FindSubmatch(str string) (Match, error) {
|
|
|
|
|
match, err := re.FindNthMatch(str, 1)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return Match{}, fmt.Errorf("no match found")
|
|
|
|
|
} else {
|
|
|
|
|
@@ -121,9 +135,9 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
|
|
|
|
|
// 2. Group n found a zero-length match
|
|
|
|
|
//
|
|
|
|
|
// A return value of nil indicates no match.
|
|
|
|
|
func (regex Reg) FindStringSubmatch(str string) []string {
|
|
|
|
|
matchStr := make([]string, regex.numGroups+1)
|
|
|
|
|
match, err := regex.FindSubmatch(str)
|
|
|
|
|
func (re Reg) FindStringSubmatch(str string) []string {
|
|
|
|
|
matchStr := make([]string, re.numGroups+1)
|
|
|
|
|
match, err := re.FindSubmatch(str)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
@@ -145,8 +159,8 @@ func (regex Reg) FindStringSubmatch(str string) []string {
|
|
|
|
|
// FindAllString is the 'all' version of [FindString].
|
|
|
|
|
// It returns a slice of strings containing the text of all matches of
|
|
|
|
|
// the regex in the given string.
|
|
|
|
|
func (regex Reg) FindAllString(str string) []string {
|
|
|
|
|
zerogroups := regex.FindAll(str)
|
|
|
|
|
func (re Reg) FindAllString(str string) []string {
|
|
|
|
|
zerogroups := re.FindAll(str)
|
|
|
|
|
matchStrs := funcMap(zerogroups, func(g Group) string {
|
|
|
|
|
return str[g.StartIdx:g.EndIdx]
|
|
|
|
|
})
|
|
|
|
|
@@ -155,14 +169,14 @@ func (regex Reg) FindAllString(str string) []string {
|
|
|
|
|
|
|
|
|
|
// FindNthMatch return the 'n'th match of the regex in the given string.
|
|
|
|
|
// It returns an error (!= nil) if there are fewer than 'n' matches in the string.
|
|
|
|
|
func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
|
|
|
|
|
func (re Reg) FindNthMatch(str string, n int) (Match, error) {
|
|
|
|
|
idx := 0
|
|
|
|
|
matchNum := 0
|
|
|
|
|
str_runes := []rune(str)
|
|
|
|
|
var matchFound bool
|
|
|
|
|
var matchIdx Match
|
|
|
|
|
for idx <= len(str_runes) {
|
|
|
|
|
matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
|
|
|
|
|
matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
|
|
|
|
|
if matchFound {
|
|
|
|
|
matchNum++
|
|
|
|
|
}
|
|
|
|
|
@@ -175,14 +189,14 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FindAllSubmatch returns a slice of matches in the given string.
|
|
|
|
|
func (regex Reg) FindAllSubmatch(str string) []Match {
|
|
|
|
|
func (re Reg) FindAllSubmatch(str string) []Match {
|
|
|
|
|
idx := 0
|
|
|
|
|
str_runes := []rune(str)
|
|
|
|
|
var matchFound bool
|
|
|
|
|
var matchIdx Match
|
|
|
|
|
indices := make([]Match, 0)
|
|
|
|
|
for idx <= len(str_runes) {
|
|
|
|
|
matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
|
|
|
|
|
matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
|
|
|
|
|
if matchFound {
|
|
|
|
|
indices = append(indices, matchIdx)
|
|
|
|
|
}
|
|
|
|
|
@@ -191,7 +205,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
|
|
|
|
|
return indices
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState) []nfaState {
|
|
|
|
|
func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
|
|
|
|
|
if stateExists(list, state) || stateExists(visited, state) {
|
|
|
|
|
return list
|
|
|
|
|
}
|
|
|
|
|
@@ -199,32 +213,32 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
|
|
|
|
|
|
|
|
|
|
if state.isKleene || state.isQuestion {
|
|
|
|
|
copyThread(state.splitState, state)
|
|
|
|
|
list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
|
|
|
|
|
list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
|
|
|
|
|
copyThread(state.next, state)
|
|
|
|
|
list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
|
|
|
|
|
list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
|
|
|
|
|
return list
|
|
|
|
|
}
|
|
|
|
|
if state.isAlternation {
|
|
|
|
|
copyThread(state.next, state)
|
|
|
|
|
list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
|
|
|
|
|
list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
|
|
|
|
|
copyThread(state.splitState, state)
|
|
|
|
|
list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
|
|
|
|
|
list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
|
|
|
|
|
return list
|
|
|
|
|
}
|
|
|
|
|
state.threadGroups = append([]Group{}, threadGroups...)
|
|
|
|
|
if state.assert != noneAssert {
|
|
|
|
|
if state.checkAssertion(str, idx) {
|
|
|
|
|
if state.checkAssertion(str, idx, preferLongest) {
|
|
|
|
|
copyThread(state.next, state)
|
|
|
|
|
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
|
|
|
|
|
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if state.groupBegin {
|
|
|
|
|
state.threadGroups[state.groupNum].StartIdx = idx
|
|
|
|
|
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
|
|
|
|
|
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
|
|
|
|
|
}
|
|
|
|
|
if state.groupEnd {
|
|
|
|
|
state.threadGroups[state.groupNum].EndIdx = idx
|
|
|
|
|
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
|
|
|
|
|
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
|
|
|
|
|
}
|
|
|
|
|
return append(list, state)
|
|
|
|
|
|
|
|
|
|
@@ -233,7 +247,7 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
|
|
|
|
|
// Helper for FindAllMatches. Returns whether it found a match, the
|
|
|
|
|
// first Match it finds, and how far it got into the string ie. where
|
|
|
|
|
// the next search should start from.
|
|
|
|
|
func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
|
|
|
|
|
func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) {
|
|
|
|
|
// Base case - exit if offset exceeds string's length
|
|
|
|
|
if offset > len(str) {
|
|
|
|
|
// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
|
|
|
|
|
@@ -248,7 +262,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|
|
|
|
// If the first state is an assertion, makes sure the assertion
|
|
|
|
|
// is true before we do _anything_ else.
|
|
|
|
|
if start.assert != noneAssert {
|
|
|
|
|
if start.checkAssertion(str, offset) == false {
|
|
|
|
|
if start.checkAssertion(str, offset, preferLongest) == false {
|
|
|
|
|
i++
|
|
|
|
|
return false, []Group{}, i
|
|
|
|
|
}
|
|
|
|
|
@@ -256,7 +270,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|
|
|
|
|
|
|
|
|
start.threadGroups = newMatch(numGroups + 1)
|
|
|
|
|
start.threadGroups[0].StartIdx = i
|
|
|
|
|
currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil)
|
|
|
|
|
currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest)
|
|
|
|
|
var match Match = nil
|
|
|
|
|
for idx := i; idx <= len(str); idx++ {
|
|
|
|
|
if len(currentStates) == 0 {
|
|
|
|
|
@@ -273,10 +287,12 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|
|
|
|
if currentState.isLast {
|
|
|
|
|
currentState.threadGroups[0].EndIdx = idx
|
|
|
|
|
match = append([]Group{}, currentState.threadGroups...)
|
|
|
|
|
break
|
|
|
|
|
} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
|
|
|
|
|
if currentState.contentContains(str, idx) {
|
|
|
|
|
nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil)
|
|
|
|
|
if !preferLongest {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
|
|
|
|
|
if currentState.contentContains(str, idx, preferLongest) {
|
|
|
|
|
nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
@@ -291,3 +307,68 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|
|
|
|
}
|
|
|
|
|
return false, []Group{}, i + 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Expand appends template to dst, expanding any variables in template to the relevant capturing group.
|
|
|
|
|
//
|
|
|
|
|
// A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
|
|
|
|
|
// To insert a literal $, do not put a number after it. Alternatively, you can use $$.
|
|
|
|
|
// src is the input string, and match must be the result of [Reg.FindSubmatch].
|
|
|
|
|
func (re Reg) Expand(dst string, template string, src string, match Match) string {
|
|
|
|
|
templateRuneSlc := []rune(template)
|
|
|
|
|
srcRuneSlc := []rune(src)
|
|
|
|
|
i := 0
|
|
|
|
|
for i < len(templateRuneSlc) {
|
|
|
|
|
c := templateRuneSlc[i]
|
|
|
|
|
if c == '$' {
|
|
|
|
|
i += 1
|
|
|
|
|
// The dollar sign is the last character of the string, or it is proceeded by another dollar sign
|
|
|
|
|
if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' {
|
|
|
|
|
dst += "$"
|
|
|
|
|
i++
|
|
|
|
|
} else {
|
|
|
|
|
numStr := ""
|
|
|
|
|
for unicode.IsDigit(templateRuneSlc[i]) {
|
|
|
|
|
numStr += string(templateRuneSlc[i])
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
if numStr == "" {
|
|
|
|
|
dst += "$"
|
|
|
|
|
} else {
|
|
|
|
|
num, _ := strconv.Atoi(numStr)
|
|
|
|
|
if num < len(match) {
|
|
|
|
|
dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx])
|
|
|
|
|
} else {
|
|
|
|
|
dst += "$" + numStr
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
dst += string(c)
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return dst
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// LiteralPrefix returns a string that must begin any match of the given regular expression.
|
|
|
|
|
// The second return value is true if the string comprises the entire expression.
|
|
|
|
|
func (re Reg) LiteralPrefix() (prefix string, complete bool) {
|
|
|
|
|
state := re.start
|
|
|
|
|
if state.assert != noneAssert {
|
|
|
|
|
state = state.next
|
|
|
|
|
}
|
|
|
|
|
for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert {
|
|
|
|
|
if state.groupBegin || state.groupEnd {
|
|
|
|
|
state = state.next
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
prefix += string(rune(state.content[0]))
|
|
|
|
|
state = state.next
|
|
|
|
|
}
|
|
|
|
|
if state.isLast {
|
|
|
|
|
complete = true
|
|
|
|
|
} else {
|
|
|
|
|
complete = false
|
|
|
|
|
}
|
|
|
|
|
return prefix, complete
|
|
|
|
|
}
|
|
|
|
|
|