Implement PCRE Matching (prefer left-branch) #2
@@ -2,6 +2,8 @@ package regex
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
// A Match represents a match found by the regex in a given string.
|
// A Match represents a match found by the regex in a given string.
|
||||||
@@ -77,6 +79,18 @@ func (regex Reg) Match(str string) bool {
|
|||||||
return err == nil
|
return err == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CompileMatch compiles expr and returns true if str contains a match of the expression.
|
||||||
|
// It is equivalent to [regexp.Match].
|
||||||
|
// An optional list of flags may be provided (see [ReFlag]).
|
||||||
|
// It returns an error (!= nil) if there was an error compiling the expression.
|
||||||
|
func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) {
|
||||||
|
re, err := Compile(expr, flags...)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return re.Match(str), nil
|
||||||
|
}
|
||||||
|
|
||||||
// FindAll returns a slice containing all the 0-groups of the regex in the given string.
|
// FindAll returns a slice containing all the 0-groups of the regex in the given string.
|
||||||
// A 0-group represents the match without any submatches.
|
// A 0-group represents the match without any submatches.
|
||||||
func (regex Reg) FindAll(str string) []Group {
|
func (regex Reg) FindAll(str string) []Group {
|
||||||
@@ -162,7 +176,7 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
|
|||||||
var matchFound bool
|
var matchFound bool
|
||||||
var matchIdx Match
|
var matchIdx Match
|
||||||
for idx <= len(str_runes) {
|
for idx <= len(str_runes) {
|
||||||
matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
|
matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest)
|
||||||
if matchFound {
|
if matchFound {
|
||||||
matchNum++
|
matchNum++
|
||||||
}
|
}
|
||||||
@@ -182,7 +196,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
|
|||||||
var matchIdx Match
|
var matchIdx Match
|
||||||
indices := make([]Match, 0)
|
indices := make([]Match, 0)
|
||||||
for idx <= len(str_runes) {
|
for idx <= len(str_runes) {
|
||||||
matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
|
matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest)
|
||||||
if matchFound {
|
if matchFound {
|
||||||
indices = append(indices, matchIdx)
|
indices = append(indices, matchIdx)
|
||||||
}
|
}
|
||||||
@@ -191,7 +205,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
|
|||||||
return indices
|
return indices
|
||||||
}
|
}
|
||||||
|
|
||||||
func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState) []nfaState {
|
func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
|
||||||
if stateExists(list, state) || stateExists(visited, state) {
|
if stateExists(list, state) || stateExists(visited, state) {
|
||||||
return list
|
return list
|
||||||
}
|
}
|
||||||
@@ -199,32 +213,32 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
|
|||||||
|
|
||||||
if state.isKleene || state.isQuestion {
|
if state.isKleene || state.isQuestion {
|
||||||
copyThread(state.splitState, state)
|
copyThread(state.splitState, state)
|
||||||
list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
|
list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
|
||||||
copyThread(state.next, state)
|
copyThread(state.next, state)
|
||||||
list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
|
list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
|
||||||
return list
|
return list
|
||||||
}
|
}
|
||||||
if state.isAlternation {
|
if state.isAlternation {
|
||||||
copyThread(state.next, state)
|
copyThread(state.next, state)
|
||||||
list = addStateToList(str, idx, list, *state.next, threadGroups, visited)
|
list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
|
||||||
copyThread(state.splitState, state)
|
copyThread(state.splitState, state)
|
||||||
list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited)
|
list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
|
||||||
return list
|
return list
|
||||||
}
|
}
|
||||||
state.threadGroups = append([]Group{}, threadGroups...)
|
state.threadGroups = append([]Group{}, threadGroups...)
|
||||||
if state.assert != noneAssert {
|
if state.assert != noneAssert {
|
||||||
if state.checkAssertion(str, idx) {
|
if state.checkAssertion(str, idx, preferLongest) {
|
||||||
copyThread(state.next, state)
|
copyThread(state.next, state)
|
||||||
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
|
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if state.groupBegin {
|
if state.groupBegin {
|
||||||
state.threadGroups[state.groupNum].StartIdx = idx
|
state.threadGroups[state.groupNum].StartIdx = idx
|
||||||
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
|
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
|
||||||
}
|
}
|
||||||
if state.groupEnd {
|
if state.groupEnd {
|
||||||
state.threadGroups[state.groupNum].EndIdx = idx
|
state.threadGroups[state.groupNum].EndIdx = idx
|
||||||
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited)
|
return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
|
||||||
}
|
}
|
||||||
return append(list, state)
|
return append(list, state)
|
||||||
|
|
||||||
@@ -233,7 +247,7 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
|
|||||||
// Helper for FindAllMatches. Returns whether it found a match, the
|
// Helper for FindAllMatches. Returns whether it found a match, the
|
||||||
// first Match it finds, and how far it got into the string ie. where
|
// first Match it finds, and how far it got into the string ie. where
|
||||||
// the next search should start from.
|
// the next search should start from.
|
||||||
func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
|
func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) {
|
||||||
// Base case - exit if offset exceeds string's length
|
// Base case - exit if offset exceeds string's length
|
||||||
if offset > len(str) {
|
if offset > len(str) {
|
||||||
// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
|
// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
|
||||||
@@ -248,7 +262,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
// If the first state is an assertion, makes sure the assertion
|
// If the first state is an assertion, makes sure the assertion
|
||||||
// is true before we do _anything_ else.
|
// is true before we do _anything_ else.
|
||||||
if start.assert != noneAssert {
|
if start.assert != noneAssert {
|
||||||
if start.checkAssertion(str, offset) == false {
|
if start.checkAssertion(str, offset, preferLongest) == false {
|
||||||
i++
|
i++
|
||||||
return false, []Group{}, i
|
return false, []Group{}, i
|
||||||
}
|
}
|
||||||
@@ -256,7 +270,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
|
|
||||||
start.threadGroups = newMatch(numGroups + 1)
|
start.threadGroups = newMatch(numGroups + 1)
|
||||||
start.threadGroups[0].StartIdx = i
|
start.threadGroups[0].StartIdx = i
|
||||||
currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil)
|
currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest)
|
||||||
var match Match = nil
|
var match Match = nil
|
||||||
for idx := i; idx <= len(str); idx++ {
|
for idx := i; idx <= len(str); idx++ {
|
||||||
if len(currentStates) == 0 {
|
if len(currentStates) == 0 {
|
||||||
@@ -274,9 +288,9 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
currentState.threadGroups[0].EndIdx = idx
|
currentState.threadGroups[0].EndIdx = idx
|
||||||
match = append([]Group{}, currentState.threadGroups...)
|
match = append([]Group{}, currentState.threadGroups...)
|
||||||
break
|
break
|
||||||
} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion
|
} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
|
||||||
if currentState.contentContains(str, idx) {
|
if currentState.contentContains(str, idx, preferLongest) {
|
||||||
nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil)
|
nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -291,3 +305,68 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
|
|||||||
}
|
}
|
||||||
return false, []Group{}, i + 1
|
return false, []Group{}, i + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Expand appends template to dst, expanding any variables in template to the relevant capturing group.
|
||||||
|
//
|
||||||
|
// A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
|
||||||
|
// To insert a literal $, do not put a number after it. Alternatively, you can use $$.
|
||||||
|
// src is the input string, and match must be the result of [Reg.FindSubmatch].
|
||||||
|
func (regex Reg) Expand(dst string, template string, src string, match Match) string {
|
||||||
|
templateRuneSlc := []rune(template)
|
||||||
|
srcRuneSlc := []rune(src)
|
||||||
|
i := 0
|
||||||
|
for i < len(templateRuneSlc) {
|
||||||
|
c := templateRuneSlc[i]
|
||||||
|
if c == '$' {
|
||||||
|
i += 1
|
||||||
|
// The dollar sign is the last character of the string, or it is proceeded by another dollar sign
|
||||||
|
if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' {
|
||||||
|
dst += "$"
|
||||||
|
i++
|
||||||
|
} else {
|
||||||
|
numStr := ""
|
||||||
|
for unicode.IsDigit(templateRuneSlc[i]) {
|
||||||
|
numStr += string(templateRuneSlc[i])
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
if numStr == "" {
|
||||||
|
dst += "$"
|
||||||
|
} else {
|
||||||
|
num, _ := strconv.Atoi(numStr)
|
||||||
|
if num < len(match) {
|
||||||
|
dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx])
|
||||||
|
} else {
|
||||||
|
dst += "$" + numStr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
dst += string(c)
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dst
|
||||||
|
}
|
||||||
|
|
||||||
|
// LiteralPrefix returns a string that must begin any match of the given regular expression.
|
||||||
|
// The second return value is true if the string comprises the entire expression.
|
||||||
|
func (regex Reg) LiteralPrefix() (prefix string, complete bool) {
|
||||||
|
state := regex.start
|
||||||
|
if state.assert != noneAssert {
|
||||||
|
state = state.next
|
||||||
|
}
|
||||||
|
for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert {
|
||||||
|
if state.groupBegin || state.groupEnd {
|
||||||
|
state = state.next
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
prefix += string(rune(state.content[0]))
|
||||||
|
state = state.next
|
||||||
|
}
|
||||||
|
if state.isLast {
|
||||||
|
complete = true
|
||||||
|
} else {
|
||||||
|
complete = false
|
||||||
|
}
|
||||||
|
return prefix, complete
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user