Wrote new methods Expand() and preferLongest(); Use new function signatures (with preferLongest); only characters should be added to next state list
This commit is contained in:
		| @@ -2,6 +2,8 @@ package regex | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strconv" | ||||
| 	"unicode" | ||||
| ) | ||||
|  | ||||
| // A Match represents a match found by the regex in a given string. | ||||
| @@ -77,6 +79,18 @@ func (regex Reg) Match(str string) bool { | ||||
| 	return err == nil | ||||
| } | ||||
|  | ||||
| // CompileMatch compiles expr and returns true if str contains a match of the expression. | ||||
| // It is equivalent to [regexp.Match]. | ||||
| // An optional list of flags may be provided (see [ReFlag]). | ||||
| // It returns an error (!= nil) if there was an error compiling the expression. | ||||
| func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) { | ||||
| 	re, err := Compile(expr, flags...) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 	return re.Match(str), nil | ||||
| } | ||||
|  | ||||
| // FindAll returns a slice containing all the 0-groups of the regex in the given string. | ||||
| // A 0-group represents the match without any submatches. | ||||
| func (regex Reg) FindAll(str string) []Group { | ||||
| @@ -162,7 +176,7 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) { | ||||
| 	var matchFound bool | ||||
| 	var matchIdx Match | ||||
| 	for idx <= len(str_runes) { | ||||
| 		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups) | ||||
| 		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest) | ||||
| 		if matchFound { | ||||
| 			matchNum++ | ||||
| 		} | ||||
| @@ -182,7 +196,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match { | ||||
| 	var matchIdx Match | ||||
| 	indices := make([]Match, 0) | ||||
| 	for idx <= len(str_runes) { | ||||
| 		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups) | ||||
| 		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups, regex.preferLongest) | ||||
| 		if matchFound { | ||||
| 			indices = append(indices, matchIdx) | ||||
| 		} | ||||
| @@ -191,7 +205,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match { | ||||
| 	return indices | ||||
| } | ||||
|  | ||||
| func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState) []nfaState { | ||||
| func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState { | ||||
| 	if stateExists(list, state) || stateExists(visited, state) { | ||||
| 		return list | ||||
| 	} | ||||
| @@ -199,32 +213,32 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread | ||||
|  | ||||
| 	if state.isKleene || state.isQuestion { | ||||
| 		copyThread(state.splitState, state) | ||||
| 		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited) | ||||
| 		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest) | ||||
| 		copyThread(state.next, state) | ||||
| 		list = addStateToList(str, idx, list, *state.next, threadGroups, visited) | ||||
| 		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest) | ||||
| 		return list | ||||
| 	} | ||||
| 	if state.isAlternation { | ||||
| 		copyThread(state.next, state) | ||||
| 		list = addStateToList(str, idx, list, *state.next, threadGroups, visited) | ||||
| 		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest) | ||||
| 		copyThread(state.splitState, state) | ||||
| 		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited) | ||||
| 		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest) | ||||
| 		return list | ||||
| 	} | ||||
| 	state.threadGroups = append([]Group{}, threadGroups...) | ||||
| 	if state.assert != noneAssert { | ||||
| 		if state.checkAssertion(str, idx) { | ||||
| 		if state.checkAssertion(str, idx, preferLongest) { | ||||
| 			copyThread(state.next, state) | ||||
| 			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited) | ||||
| 			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest) | ||||
| 		} | ||||
| 	} | ||||
| 	if state.groupBegin { | ||||
| 		state.threadGroups[state.groupNum].StartIdx = idx | ||||
| 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited) | ||||
| 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest) | ||||
| 	} | ||||
| 	if state.groupEnd { | ||||
| 		state.threadGroups[state.groupNum].EndIdx = idx | ||||
| 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited) | ||||
| 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest) | ||||
| 	} | ||||
| 	return append(list, state) | ||||
|  | ||||
| @@ -233,7 +247,7 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread | ||||
| // Helper for FindAllMatches. Returns whether it found a match, the | ||||
| // first Match it finds, and how far it got into the string ie. where | ||||
| // the next search should start from. | ||||
| func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) { | ||||
| func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) { | ||||
| 	// Base case - exit if offset exceeds string's length | ||||
| 	if offset > len(str) { | ||||
| 		// The second value here shouldn't be used, because we should exit when the third return value is > than len(str) | ||||
| @@ -248,7 +262,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 	// If the first state is an assertion, makes sure the assertion | ||||
| 	// is true before we do _anything_ else. | ||||
| 	if start.assert != noneAssert { | ||||
| 		if start.checkAssertion(str, offset) == false { | ||||
| 		if start.checkAssertion(str, offset, preferLongest) == false { | ||||
| 			i++ | ||||
| 			return false, []Group{}, i | ||||
| 		} | ||||
| @@ -256,7 +270,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
|  | ||||
| 	start.threadGroups = newMatch(numGroups + 1) | ||||
| 	start.threadGroups[0].StartIdx = i | ||||
| 	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil) | ||||
| 	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest) | ||||
| 	var match Match = nil | ||||
| 	for idx := i; idx <= len(str); idx++ { | ||||
| 		if len(currentStates) == 0 { | ||||
| @@ -274,9 +288,9 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 				currentState.threadGroups[0].EndIdx = idx | ||||
| 				match = append([]Group{}, currentState.threadGroups...) | ||||
| 				break | ||||
| 			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion | ||||
| 				if currentState.contentContains(str, idx) { | ||||
| 					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil) | ||||
| 			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character | ||||
| 				if currentState.contentContains(str, idx, preferLongest) { | ||||
| 					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest) | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| @@ -291,3 +305,68 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in | ||||
| 	} | ||||
| 	return false, []Group{}, i + 1 | ||||
| } | ||||
|  | ||||
| // Expand appends template to dst, expanding any variables in template to the relevant capturing group. | ||||
| // | ||||
| // A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group. | ||||
| // To insert a literal $, do not put a number after it. Alternatively, you can use $$. | ||||
| // src is the input string, and match must be the result of [Reg.FindSubmatch]. | ||||
| func (regex Reg) Expand(dst string, template string, src string, match Match) string { | ||||
| 	templateRuneSlc := []rune(template) | ||||
| 	srcRuneSlc := []rune(src) | ||||
| 	i := 0 | ||||
| 	for i < len(templateRuneSlc) { | ||||
| 		c := templateRuneSlc[i] | ||||
| 		if c == '$' { | ||||
| 			i += 1 | ||||
| 			// The dollar sign is the last character of the string, or it is proceeded by another dollar sign | ||||
| 			if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' { | ||||
| 				dst += "$" | ||||
| 				i++ | ||||
| 			} else { | ||||
| 				numStr := "" | ||||
| 				for unicode.IsDigit(templateRuneSlc[i]) { | ||||
| 					numStr += string(templateRuneSlc[i]) | ||||
| 					i++ | ||||
| 				} | ||||
| 				if numStr == "" { | ||||
| 					dst += "$" | ||||
| 				} else { | ||||
| 					num, _ := strconv.Atoi(numStr) | ||||
| 					if num < len(match) { | ||||
| 						dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx]) | ||||
| 					} else { | ||||
| 						dst += "$" + numStr | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 		} else { | ||||
| 			dst += string(c) | ||||
| 			i++ | ||||
| 		} | ||||
| 	} | ||||
| 	return dst | ||||
| } | ||||
|  | ||||
| // LiteralPrefix returns a string that must begin any match of the given regular expression. | ||||
| // The second return value is true if the string comprises the entire expression. | ||||
| func (regex Reg) LiteralPrefix() (prefix string, complete bool) { | ||||
| 	state := regex.start | ||||
| 	if state.assert != noneAssert { | ||||
| 		state = state.next | ||||
| 	} | ||||
| 	for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert { | ||||
| 		if state.groupBegin || state.groupEnd { | ||||
| 			state = state.next | ||||
| 			continue | ||||
| 		} | ||||
| 		prefix += string(rune(state.content[0])) | ||||
| 		state = state.next | ||||
| 	} | ||||
| 	if state.isLast { | ||||
| 		complete = true | ||||
| 	} else { | ||||
| 		complete = false | ||||
| 	} | ||||
| 	return prefix, complete | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user