diff --git a/regex/matching.go b/regex/matching.go index 1b07ee8..4d7c600 100644 --- a/regex/matching.go +++ b/regex/matching.go @@ -2,6 +2,8 @@ package regex import ( "fmt" + "strconv" + "unicode" ) // A Match represents a match found by the regex in a given string. @@ -77,6 +79,18 @@ func (regex Reg) Match(str string) bool { return err == nil } +// CompileMatch compiles expr and returns true if str contains a match of the expression. +// It is equivalent to [regexp.Match]. +// An optional list of flags may be provided (see [ReFlag]). +// It returns an error (!= nil) if there was an error compiling the expression. +func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) { + re, err := Compile(expr, flags...) + if err != nil { + return false, err + } + return re.Match(str), nil +} + // FindAll returns a slice containing all the 0-groups of the regex in the given string. // A 0-group represents the match without any submatches. func (regex Reg) FindAll(str string) []Group { @@ -162,7 +176,7 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) { var matchFound bool var matchIdx Match for idx <= len(str_runes) { - matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups) + matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest) if matchFound { matchNum++ } @@ -182,7 +196,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match { var matchIdx Match indices := make([]Match, 0) for idx <= len(str_runes) { - matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups) + matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest) if matchFound { indices = append(indices, matchIdx) } @@ -191,7 +205,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match { return indices } -func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState) []nfaState { +func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState { if stateExists(list, state) || stateExists(visited, state) { return list } @@ -199,32 +213,32 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread if state.isKleene || state.isQuestion { copyThread(state.splitState, state) - list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited) + list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest) copyThread(state.next, state) - list = addStateToList(str, idx, list, *state.next, threadGroups, visited) + list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest) return list } if state.isAlternation { copyThread(state.next, state) - list = addStateToList(str, idx, list, *state.next, threadGroups, visited) + list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest) copyThread(state.splitState, state) - list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited) + list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest) return list } state.threadGroups = append([]Group{}, threadGroups...) if state.assert != noneAssert { - if state.checkAssertion(str, idx) { + if state.checkAssertion(str, idx, preferLongest) { copyThread(state.next, state) - return addStateToList(str, idx, list, *state.next, state.threadGroups, visited) + return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest) } } if state.groupBegin { state.threadGroups[state.groupNum].StartIdx = idx - return addStateToList(str, idx, list, *state.next, state.threadGroups, visited) + return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest) } if state.groupEnd { state.threadGroups[state.groupNum].EndIdx = idx - return addStateToList(str, idx, list, *state.next, state.threadGroups, visited) + return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest) } return append(list, state) @@ -233,7 +247,7 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread // Helper for FindAllMatches. Returns whether it found a match, the // first Match it finds, and how far it got into the string ie. where // the next search should start from. -func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) { +func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) { // Base case - exit if offset exceeds string's length if offset > len(str) { // The second value here shouldn't be used, because we should exit when the third return value is > than len(str) @@ -248,7 +262,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in // If the first state is an assertion, makes sure the assertion // is true before we do _anything_ else. if start.assert != noneAssert { - if start.checkAssertion(str, offset) == false { + if start.checkAssertion(str, offset, preferLongest) == false { i++ return false, []Group{}, i } @@ -256,7 +270,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in start.threadGroups = newMatch(numGroups + 1) start.threadGroups[0].StartIdx = i - currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil) + currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest) var match Match = nil for idx := i; idx <= len(str); idx++ { if len(currentStates) == 0 { @@ -274,9 +288,9 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in currentState.threadGroups[0].EndIdx = idx match = append([]Group{}, currentState.threadGroups...) break - } else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion - if currentState.contentContains(str, idx) { - nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil) + } else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character + if currentState.contentContains(str, idx, preferLongest) { + nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest) } } } @@ -291,3 +305,68 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in } return false, []Group{}, i + 1 } + +// Expand appends template to dst, expanding any variables in template to the relevant capturing group. +// +// A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group. +// To insert a literal $, do not put a number after it. Alternatively, you can use $$. +// src is the input string, and match must be the result of [Reg.FindSubmatch]. +func (regex Reg) Expand(dst string, template string, src string, match Match) string { + templateRuneSlc := []rune(template) + srcRuneSlc := []rune(src) + i := 0 + for i < len(templateRuneSlc) { + c := templateRuneSlc[i] + if c == '$' { + i += 1 + // The dollar sign is the last character of the string, or it is proceeded by another dollar sign + if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' { + dst += "$" + i++ + } else { + numStr := "" + for unicode.IsDigit(templateRuneSlc[i]) { + numStr += string(templateRuneSlc[i]) + i++ + } + if numStr == "" { + dst += "$" + } else { + num, _ := strconv.Atoi(numStr) + if num < len(match) { + dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx]) + } else { + dst += "$" + numStr + } + } + } + } else { + dst += string(c) + i++ + } + } + return dst +} + +// LiteralPrefix returns a string that must begin any match of the given regular expression. +// The second return value is true if the string comprises the entire expression. +func (regex Reg) LiteralPrefix() (prefix string, complete bool) { + state := regex.start + if state.assert != noneAssert { + state = state.next + } + for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert { + if state.groupBegin || state.groupEnd { + state = state.next + continue + } + prefix += string(rune(state.content[0])) + state = state.next + } + if state.isLast { + complete = true + } else { + complete = false + } + return prefix, complete +}