Commented out tests that would only pass with Longest()

Rename method receivers from 'regex' to 're' (it's shorter)
Don't break when a match is found, if we are looking for the longest match
2025-02-09 16:08:16 -05:00 · 2025-02-09 15:51:46 -05:00 · 2025-02-09 15:48:33 -05:00 · 2025-02-09 15:47:57 -05:00 · 2025-02-09 15:41:13 -05:00 · 2025-02-09 15:40:59 -05:00
10 changed files with 412 additions and 631 deletions
--- a/cmd/unique_array.go
+++ b/cmd/unique_array.go
@@ -16,7 +16,6 @@ func (s *uniq_arr[T]) add(vals ...T) {
 			s.backingMap[item] = struct{}{}
 		}
 	}
-	return
 }

 func (s uniq_arr[T]) contains(val T) bool {
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -12,16 +12,27 @@ var notDotChars []rune

 // A Reg represents the result of compiling a regular expression. It contains
 // the startState of the NFA representation of the regex, and the number of capturing
-// groups in the regex.
+// groups in the regex. It also contains the expression string.
 type Reg struct {
-	start     *nfaState
-	numGroups int
+	start         *nfaState
+	numGroups     int
+	str           string
+	preferLongest bool
 }

-// numSubexp eturns the number of sub-expressions in the given [Reg]. This is equivalent
+// NumSubexp returns the number of sub-expressions in the given [Reg]. This is equivalent
 // to the number of capturing groups.
-func (r Reg) NumSubexp() int {
-	return r.numGroups
+func (re Reg) NumSubexp() int {
+	return re.numGroups
+}
+
+// String returns the string used to compile the expression.
+func (re Reg) String() string {
+	return re.str
+}
+
+func (re *Reg) Longest() {
+	re.preferLongest = true
 }

 const concatRune rune = 0xF0001
@@ -816,7 +827,7 @@ func thompson(re []postfixNode) (Reg, error) {
 	// In these cases, we will return an NFA with 1 state, with an assertion that is always true.
 	if len(re) == 0 {
 		start := zeroLengthMatchState()
-		nfa = append(nfa, &start)
+		nfa = append(nfa, start)
 	}

 	for _, c := range re {
@@ -987,7 +998,8 @@ func thompson(re []postfixNode) (Reg, error) {
 		if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated
 			// Map the list of nodes to a list of states, each state containing the contents of a specific node
 			states := funcMap(c.nodeContents, func(node postfixNode) *nfaState {
-				s := newState()
+				s := &nfaState{}
+				s.output = append(s.output, s)
 				nodeContents := node.contents
 				if caseInsensitive {
 					nodeContents = slices.Concat(funcMap(nodeContents, func(r rune) []rune {
@@ -1001,7 +1013,7 @@ func thompson(re []postfixNode) (Reg, error) {
 						return n.contents
 					})...)
 				}
-				return &s
+				return s
 			})
 			// Reduce the list of states down to a single state by alternating them
 			toAdd := funcReduce(states, func(s1 *nfaState, s2 *nfaState) *nfaState {
@@ -1046,7 +1058,10 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, fmt.Errorf("error applying question operator")
 			}
-			s2 := question(s1)
+			s2, err := question(s1)
+			if err != nil {
+				return Reg{}, err
+			}
 			nfa = append(nfa, s2)
 		case pipeNode:
 			// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
@@ -1064,14 +1079,14 @@ func thompson(re []postfixNode) (Reg, error) {
 					nfa = append(nfa, s2)
 				}
 				tmp := zeroLengthMatchState()
-				s2 = &tmp
+				s2 = tmp
 			}
 			if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err1 == nil { // See above for explanation
 					nfa = append(nfa, s1)
 				}
 				tmp := zeroLengthMatchState()
-				s1 = &tmp
+				s1 = tmp
 			}
 			s3 := alternate(s1, s2)
 			nfa = append(nfa, s3)
@@ -1105,7 +1120,11 @@ func thompson(re []postfixNode) (Reg, error) {
 				stateToAdd = concatenate(stateToAdd, s2)
 			} else { // Case 2
 				for i := c.startReps; i < c.endReps; i++ {
-					stateToAdd = concatenate(stateToAdd, question(cloneState(poppedState)))
+					tmp, err := question(cloneState(poppedState))
+					if err != nil {
+						return Reg{}, fmt.Errorf("error processing bounded repetition")
+					}
+					stateToAdd = concatenate(stateToAdd, tmp)
 				}
 			}
 			nfa = append(nfa, stateToAdd)
@@ -1120,7 +1139,8 @@ func thompson(re []postfixNode) (Reg, error) {

 	concatenate(nfa[0], &lastState)

-	return Reg{nfa[0], numGroups}, nil
+	// The string is empty here, because we add it in Compile()
+	return Reg{nfa[0], numGroups, "", false}, nil

 }

@@ -1138,10 +1158,11 @@ func Compile(re string, flags ...ReFlag) (Reg, error) {
 	if err != nil {
 		return Reg{}, fmt.Errorf("error compiling regex: %w", err)
 	}
+	reg.str = re
 	return reg, nil
 }

-// MustCompile panicks if Compile returns an error. They are identical in all other respects.
+// MustCompile panics if Compile returns an error. They are identical in all other respects.
 func MustCompile(re string, flags ...ReFlag) Reg {
 	reg, err := Compile(re, flags...)
 	if err != nil {
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -4,6 +4,8 @@ Package regex implements regular expression search, using a custom non-bracktrac
 The engine relies completely on UTF-8 codepoints. As such, it is capable of matching characters
 from other languages, emojis and symbols.

+The API and regex syntax are largely compatible with that of the stdlib's [regexp], with a few key differences (see 'Key Differences with regexp').
+
 The full syntax is specified below.

 # Syntax
@@ -55,8 +57,8 @@ POSIX classes (inside normal character classes):
 Composition:

 	def				Match d, followed by e, followed by f
-	x|y				Match x or y (prefer longer one)
-	xy|z			Match xy or z
+	x|y				Match x or y (prefer x)
+	xy|z			Match xy or z (prefer xy)

 Repitition (always greedy, preferring more):

@@ -94,10 +96,11 @@ Lookarounds:
 Numeric ranges:

 	<x-y>			Match any number from x to y (inclusive) (x and y must be positive numbers)
+	\<x				Match a literal '<' followed by x

 # Key Differences with regexp

-The engine and the API differ from [regexp] in a number of ways, some of them very subtle.
+The engine and the API differ from [regexp] in a few ways, some of them very subtle.
 The key differences are mentioned below.

 1. Greediness:
@@ -132,7 +135,7 @@ Rather than using primitives for return values, my engine defines two types that
 values: a [Group] represents a capturing group, and a [Match] represents a list of groups.

 [regexp] specifies a regular expression that gives a list of all the matching functions that it supports. The
-equivalent expression for this engine is:
+equivalent expression for this engine is shown below. Note that 'Index' is the default.

 	Find(All)?(String)?(Submatch)?

@@ -140,7 +143,7 @@ equivalent expression for this engine is:

 If a function contains 'All' it returns all matches instead of just the leftmost one.

-If a function contains 'String' it returns the matched text, rather than the indices.
+If a function contains 'String' it returns the matched text, rather than the index in the string.

 If a function contains 'Submatch' it returns the match, including all submatches found by
 capturing groups.
@@ -156,5 +159,20 @@ and the input string:

 The 0th group would contain 'xy' and the 1st group would contain 'y'. Any matching function without 'Submatch' in its name
 returns the 0-group.
+
+# Feature Differences
+
+The following features from [regexp] are (currently) NOT supported:
+ 1. Named capturing groups
+ 2. Non-greedy operators
+ 3. Unicode character classes
+ 4. Embedded flags (flags are passed as arguments to [Compile])
+ 5. Literal text with \Q ... \E
+
+The following features are not available in [regexp], but are supported in my engine:
+ 1. Lookarounds
+ 2. Numeric ranges
+
+I hope to shorten the first list, and expand the second.
 */
 package regex
--- a/regex/example_test.go
+++ b/regex/example_test.go
@@ -52,3 +52,40 @@ func ExampleReg_FindSubmatch() {
 	// 0	1
 	// 2	3
 }
+
+func ExampleReg_Expand() {
+	inputStr := `option1: value1
+	option2: value2`
+	regexStr := `(\w+): (\w+)`
+	templateStr := "$1 = $2\n"
+	regexComp := regex.MustCompile(regexStr, regex.RE_MULTILINE)
+	result := ""
+	for _, submatches := range regexComp.FindAllSubmatch(inputStr) {
+		result = regexComp.Expand(result, templateStr, inputStr, submatches)
+	}
+	fmt.Println(result)
+	// Output: option1 = value1
+	// option2 = value2
+
+}
+
+func ExampleReg_LiteralPrefix() {
+	regexStr := `a(b|c)d*`
+	regexComp := regex.MustCompile(regexStr)
+	prefix, complete := regexComp.LiteralPrefix()
+	fmt.Println(prefix)
+	fmt.Println(complete)
+	// Output: a
+	// false
+}
+
+func ExampleReg_Longest() {
+	regexStr := `x|xx`
+	inputStr := "xx"
+	regexComp := regex.MustCompile(regexStr)
+	fmt.Println(regexComp.FindString(inputStr))
+	regexComp.Longest()
+	fmt.Println(regexComp.FindString(inputStr))
+	// Output: x
+	// xx
+}
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -2,8 +2,8 @@ package regex

 import (
 	"fmt"
-	"slices"
-	"sort"
+	"strconv"
+	"unicode"
 )

 // A Match represents a match found by the regex in a given string.
@@ -15,7 +15,7 @@ import (
 // See [Reg.FindSubmatch] for an example.
 type Match []Group

-// a Group represents a group. It contains the start index and end index of the match
+// a Group represents a capturing group. It contains the start and index of the group.
 type Group struct {
 	StartIdx int
 	EndIdx   int
@@ -30,17 +30,6 @@ func newMatch(size int) Match {
 	return toRet
 }

-// Returns the number of valid groups in the match
-func (m Match) numValidGroups() int {
-	numValid := 0
-	for _, g := range m {
-		if g.StartIdx >= 0 && g.EndIdx >= 0 {
-			numValid++
-		}
-	}
-	return numValid
-}
-
 // Returns a string containing the indices of all (valid) groups in the match
 func (m Match) String() string {
 	var toRet string
@@ -59,7 +48,7 @@ func (idx Group) String() string {
 	return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
 }

-// Returns whether a group is valid (ie. whether it matched any text). It
+// IsValid returns whether a group is valid (ie. whether it matched any text). It
 // simply ensures that both indices of the group are >= 0.
 func (g Group) IsValid() bool {
 	return g.StartIdx >= 0 && g.EndIdx >= 0
@@ -70,106 +59,42 @@ func getZeroGroup(m Match) Group {
 	return m[0]
 }

-// takeZeroState takes the 0-state (if such a transition exists) for all states in the
-// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
-// the second ret val is true.
-// If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
-//func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
-//	for _, state := range states {
-//		if len(state.transitions[epsilon]) > 0 {
-//			for _, s := range state.transitions[epsilon] {
-//				if s.threadGroups == nil {
-//					s.threadGroups = newMatch(numGroups + 1)
-//				}
-//				copy(s.threadGroups, state.threadGroups)
-//				if s.groupBegin {
-//					s.threadGroups[s.groupNum].StartIdx = idx
-//					//					openParenGroups = append(openParenGroups, s.groupNum)
-//				}
-//				if s.groupEnd {
-//					s.threadGroups[s.groupNum].EndIdx = idx
-//					//					closeParenGroups = append(closeParenGroups, s.groupNum)
-//				}
-//			}
-//			rtv = append(rtv, state.transitions[epsilon]...)
-//		}
-//	}
-//	for _, state := range rtv {
-//		if len(state.transitions[epsilon]) > 0 {
-//			return rtv, true
-//		}
-//	}
-//	return rtv, false
-//}
-
-// zeroMatchPossible returns true if a zero-length match is possible
-// from any of the given states, given the string and our position in it.
-// It uses the same algorithm to find zero-states as the one inside the loop,
-// so I should probably put it in a function.
-//func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
-//	zeroStates, isZero := takeZeroState(states, numGroups, idx)
-//	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
-//	tempstates = append(tempstates, states...)
-//	tempstates = append(tempstates, zeroStates...)
-//	num_appended := 0 // number of unique states addded to tempstates
-//	for isZero == true {
-//		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
-//		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
-//		if num_appended == 0 { // break if we haven't appended any more unique values
-//			break
-//		}
-//	}
-//	for _, state := range tempstates {
-//		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
-//			return true
-//		}
-//	}
-//	return false
-//}
-
-// Prunes the slice by removing overlapping indices.
-func pruneIndices(indices []Match) []Match {
-	// First, sort the slice by the start indices
-	sort.Slice(indices, func(i, j int) bool {
-		return indices[i][0].StartIdx < indices[j][0].StartIdx
-	})
-	toRet := make([]Match, 0, len(indices))
-	current := indices[0]
-	for _, idx := range indices[1:] {
-		// idx doesn't overlap with current (starts after current ends), so add current to result
-		// and update the current.
-		if idx[0].StartIdx >= current[0].EndIdx {
-			toRet = append(toRet, current)
-			current = idx
-		} else if idx[0].EndIdx > current[0].EndIdx {
-			// idx overlaps, but it is longer, so update current
-			current = idx
-		}
-	}
-	// Add last state
-	toRet = append(toRet, current)
-	return toRet
-}
-
 func copyThread(to *nfaState, from nfaState) {
-	to.threadSP = from.threadSP
 	to.threadGroups = append([]Group{}, from.threadGroups...)
 }

 // Find returns the 0-group of the leftmost match of the regex in the given string.
 // An error value != nil indicates that no match was found.
-func (regex Reg) Find(str string) (Group, error) {
-	match, err := regex.FindNthMatch(str, 1)
+func (re Reg) Find(str string) (Group, error) {
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Group{}, fmt.Errorf("no matches found")
 	}
 	return getZeroGroup(match), nil
 }

+// Match returns a boolean value, indicating whether the regex found a match in the given string.
+func (re Reg) Match(str string) bool {
+	_, err := re.Find(str)
+	return err == nil
+}
+
+// CompileMatch compiles expr and returns true if str contains a match of the expression.
+// It is equivalent to [regexp.Match].
+// An optional list of flags may be provided (see [ReFlag]).
+// It returns an error (!= nil) if there was an error compiling the expression.
+func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) {
+	re, err := Compile(expr, flags...)
+	if err != nil {
+		return false, err
+	}
+	return re.Match(str), nil
+}
+
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
-func (regex Reg) FindAll(str string) []Group {
-	indices := regex.FindAllSubmatch(str)
+func (re Reg) FindAll(str string) []Group {
+	indices := re.FindAllSubmatch(str)
 	zeroGroups := funcMap(indices, getZeroGroup)
 	return zeroGroups
 }
@@ -178,8 +103,8 @@ func (regex Reg) FindAll(str string) []Group {
 // The return value will be an empty string in two situations:
 //  1. No match was found
 //  2. The match was an empty string
-func (regex Reg) FindString(str string) string {
-	match, err := regex.FindNthMatch(str, 1)
+func (re Reg) FindString(str string) string {
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return ""
 	}
@@ -192,8 +117,8 @@ func (regex Reg) FindString(str string) string {
 // number of groups. The validity of a group (whether or not it matched anything) can be determined with
 // [Group.IsValid], or by checking that both indices of the group are >= 0.
 // The second-return value is nil if no match was found.
-func (regex Reg) FindSubmatch(str string) (Match, error) {
-	match, err := regex.FindNthMatch(str, 1)
+func (re Reg) FindSubmatch(str string) (Match, error) {
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Match{}, fmt.Errorf("no match found")
 	} else {
@@ -201,11 +126,41 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 	}
 }

-// FindAllString is the 'all' version of FindString.
+// FindStringSubmatch is the 'string' version of [FindSubmatch]. It returns a slice of strings,
+// where the string at index i contains the text matched by the i-th capturing group.
+// The 0-th index represents the entire match.
+// An empty string at index n could mean:
+// ,
+//  1. Group n did not find a match
+//  2. Group n found a zero-length match
+//
+// A return value of nil indicates no match.
+func (re Reg) FindStringSubmatch(str string) []string {
+	matchStr := make([]string, re.numGroups+1)
+	match, err := re.FindSubmatch(str)
+	if err != nil {
+		return nil
+	}
+	nonEmptyMatchFound := false
+	for i := range match {
+		if match[i].IsValid() {
+			matchStr[i] = str[match[i].StartIdx:match[i].EndIdx]
+			nonEmptyMatchFound = true
+		} else {
+			matchStr[i] = ""
+		}
+	}
+	if nonEmptyMatchFound == false {
+		return nil
+	}
+	return matchStr
+}
+
+// FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
-func (regex Reg) FindAllString(str string) []string {
-	zerogroups := regex.FindAll(str)
+func (re Reg) FindAllString(str string) []string {
+	zerogroups := re.FindAll(str)
 	matchStrs := funcMap(zerogroups, func(g Group) string {
 		return str[g.StartIdx:g.EndIdx]
 	})
@@ -214,14 +169,14 @@ func (regex Reg) FindAllString(str string) []string {

 // FindNthMatch return the 'n'th match of the regex in the given string.
 // It returns an error (!= nil) if there are fewer than 'n' matches in the string.
-func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
+func (re Reg) FindNthMatch(str string, n int) (Match, error) {
 	idx := 0
 	matchNum := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			matchNum++
 		}
@@ -234,31 +189,65 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
 }

 // FindAllSubmatch returns a slice of matches in the given string.
-func (regex Reg) FindAllSubmatch(str string) []Match {
+func (re Reg) FindAllSubmatch(str string) []Match {
 	idx := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	indices := make([]Match, 0)
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			indices = append(indices, matchIdx)
 		}
 	}
-	if len(indices) > 0 {
-		return pruneIndices(indices)
-	}

 	return indices
 }

+func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
+	if stateExists(list, state) || stateExists(visited, state) {
+		return list
+	}
+	visited = append(visited, state)
+
+	if state.isKleene || state.isQuestion {
+		copyThread(state.splitState, state)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
+		copyThread(state.next, state)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
+		return list
+	}
+	if state.isAlternation {
+		copyThread(state.next, state)
+		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
+		copyThread(state.splitState, state)
+		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
+		return list
+	}
+	state.threadGroups = append([]Group{}, threadGroups...)
+	if state.assert != noneAssert {
+		if state.checkAssertion(str, idx, preferLongest) {
+			copyThread(state.next, state)
+			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
+		}
+	}
+	if state.groupBegin {
+		state.threadGroups[state.groupNum].StartIdx = idx
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
+	}
+	if state.groupEnd {
+		state.threadGroups[state.groupNum].EndIdx = idx
+		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
+	}
+	return append(list, state)
+
+}
+
 // Helper for FindAllMatches. Returns whether it found a match, the
 // first Match it finds, and how far it got into the string ie. where
 // the next search should start from.
-//
-//	Might return duplicates or overlapping indices, so care must be taken to prune the resulting array.
-func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
+func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) {
 	// Base case - exit if offset exceeds string's length
 	if offset > len(str) {
 		// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
@@ -266,366 +255,120 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	}
 	resetThreads(start)

-	// Hold a list of match indices for the current run. When we
-	// can no longer find a match, the match with the largest range is
-	// chosen as the match for the entire string.
-	// This allows us to pick the longest possible match (which is how greedy matching works).
-	// COMMENT ABOVE IS CURRENTLY NOT UP-TO-DATE
-	//	tempIndices := newMatch(numGroups + 1)
-
-	//	foundPath := false
-	//startIdx := offset
-	//endIdx := offset
 	currentStates := make([]nfaState, 0)
 	nextStates := make([]nfaState, 0)
-	//	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
 	i := offset // Index in string
-	//startingFrom := i                  // Store starting index

 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
 	if start.assert != noneAssert {
-		if start.checkAssertion(str, offset) == false {
+		if start.checkAssertion(str, offset, preferLongest) == false {
 			i++
 			return false, []Group{}, i
 		}
 	}
-	// Increment until we hit a character matching the start state (assuming not 0-state)
-	//	if start.isEmpty == false {
-	//		for i < len(str) && !start.contentContains(str, i) {
-	//			i++
-	//		}
-	//		startIdx = i
-	//		startingFrom = i
-	//		i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
-	//	}

-	//	start.threadGroups = newMatch(numGroups + 1)
-	// Check if the start state begins a group - if so, add the start index to our list
-	//if start.groupBegin {
-	//		start.threadGroups[start.groupNum].StartIdx = i
-	//		tempIndices[start.groupNum].startIdx = i
-	//}
-
-	start.threadSP = i
-	currentStates = append(currentStates, *start)
-	var foundMatch bool
-	var isEmptyAndNoAssertion bool
-	// Main loop
+	start.threadGroups = newMatch(numGroups + 1)
+	start.threadGroups[0].StartIdx = i
+	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest)
+	var match Match = nil
 	for idx := i; idx <= len(str); idx++ {
+		if len(currentStates) == 0 {
+			break
+		}
 		for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
 			currentState := currentStates[currentStateIdx]
-			foundMatch = false
-			isEmptyAndNoAssertion = false

 			if currentState.threadGroups == nil {
 				currentState.threadGroups = newMatch(numGroups + 1)
 				currentState.threadGroups[0].StartIdx = idx
 			}

-			if currentState.groupBegin {
-				currentState.threadGroups[currentState.groupNum].StartIdx = idx
-				//		allMatches := make([]nfaState, 0)
-				//		for _, v := range currentState.transitions {
-				//			dereferenced := funcMap(v, func(s *nfaState) nfaState {
-				//				return *s
-				//			})
-				//			allMatches = append(allMatches, dereferenced...)
-				//		}
-				//		slices.Reverse(allMatches)
-				//		for i := range allMatches {
-				//			copyThread(&allMatches[i], currentState)
-				//		}
-				//		currentStates = append(currentStates, allMatches...)
-			}
-			if currentState.groupEnd {
-				currentState.threadGroups[currentState.groupNum].EndIdx = idx
-				//			allMatches := make([]nfaState, 0)
-				//			for _, v := range currentState.transitions {
-				//				dereferenced := funcMap(v, func(s *nfaState) nfaState {
-				//					return *s
-				//				})
-				//				allMatches = append(allMatches, dereferenced...)
-				//			}
-				//			slices.Reverse(allMatches)
-				//			for i := range allMatches {
-				//				copyThread(&allMatches[i], currentState)
-				//			}
-				//			currentStates = append(currentStates, allMatches...)
-			}
-
-			//		if currentState.isKleene {
-			//			// Append the next-state (after the kleene), then append the kleene state
-			//			allMatches := make([]*nfaState, 0)
-			//			for _, v := range currentState.transitions {
-			//				allMatches = append(allMatches, v...)
-			//			}
-			//			slices.Reverse(allMatches)
-			//			for _, m := range allMatches {
-			//				m.threadGroups = currentState.threadGroups
-			//				m.threadSP = idx
-			//			}
-			//			currentStates = append(currentStates, allMatches...)
-			//
-			//			//	kleeneState := currentState.kleeneState
-			//			//	kleeneState.threadGroups = currentState.threadGroups
-			//			//	kleeneState.threadSP = currentState.threadSP
-			//			//	currentStates = append(currentStates, kleeneState)
-			//			continue
-			//		}
-
-			// Alternation - enqueue left then right state, and continue
-			if currentState.isAlternation {
-				if currentState.isKleene { // Reverse order of adding things
-					rightState := currentState.splitState
-					copyThread(rightState, currentState)
-					currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState)
-					leftState := currentState.next
-					copyThread(leftState, currentState)
-					currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState)
-				} else {
-					leftState := currentState.next
-					copyThread(leftState, currentState)
-					currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState)
-					rightState := currentState.splitState
-					copyThread(rightState, currentState)
-					currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState)
-				}
-				continue
-			}
-
-			// Empty state - enqueue next state, do _not_ increment the SP
-			if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false {
-				isEmptyAndNoAssertion = true
-			}
-
-			if currentState.contentContains(str, idx) {
-				foundMatch = true
-			}
-
-			if isEmptyAndNoAssertion || foundMatch {
-				allMatches := make([]nfaState, 0)
-				allMatches = append(allMatches, *(currentState.next))
-				slices.Reverse(allMatches)
-				for i := range allMatches {
-					copyThread(&allMatches[i], currentState)
-					if foundMatch && currentState.assert == noneAssert {
-						allMatches[i].threadSP += 1
-					}
-				}
-				if currentState.groupBegin {
-					currentStates = slices.Insert(currentStates, currentStateIdx+1, allMatches...)
-				} else if currentState.groupEnd {
-					currentStates = append(currentStates, allMatches...)
-				} else if currentState.assert != noneAssert {
-					currentStates = append(currentStates, allMatches...)
-				} else {
-					nextStates = append(nextStates, allMatches...)
-				}
-			}
-
-			if currentState.isLast && len(nextStates) == 0 { // Last state reached
+			if currentState.isLast {
 				currentState.threadGroups[0].EndIdx = idx
-				if idx == currentState.threadGroups[0].StartIdx {
-					idx += 1
+				match = append([]Group{}, currentState.threadGroups...)
+				if !preferLongest {
+					break
+				}
+			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
+				if currentState.contentContains(str, idx, preferLongest) {
+					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
 				}
-				return true, currentState.threadGroups, idx
 			}
 		}
 		currentStates = append([]nfaState{}, nextStates...)
 		nextStates = nil
 	}
+	if match != nil {
+		if offset == match[0].EndIdx {
+			return true, match, match[0].EndIdx + 1
+		}
+		return true, match, match[0].EndIdx
+	}
 	return false, []Group{}, i + 1
-	//		zeroStates := make([]*nfaState, 0)
-	//		// Keep taking zero-states, until there are no more left to take
-	//		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
-	//		topStateItem := currentStates.peek()
-	//		topState := topStateItem.(*priorQueueItem).state
-	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
-	//		tempStates = append(tempStates, zeroStates...)
-	//		num_appended := 0
-	//		for isZero == true {
-	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
-	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
-	//			if num_appended == 0 { // Break if we haven't appended any more unique values
-	//				break
-	//			}
-	//		}
-	//		if isZero == true {
-	//			currentStates.Pop()
-	//		}
-	//
-	//		for _, state := range tempStates {
-	//			heap.Push(currentStates, newPriorQueueItem(state))
-	//		}
-	//		tempStates = nil
-	//
-	//		// Take any transitions corresponding to current character
-	//		numStatesMatched := 0            // The number of states which had at least 1 match for this round
-	//		assertionFailed := false         // Whether or not an assertion failed for this round
-	//		lastStateInList := false         // Whether or not a last state was in our list of states
-	//		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
-	//		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
-	//		for numStatesMatched == 0 && lastStateInList == false {
-	//			if currentStates.Len() == 0 {
-	//				break
-	//			}
-	//			stateItem := heap.Pop(currentStates)
-	//			state := stateItem.(*priorQueueItem).state
-	//			matches, numMatches := state.matchesFor(str, i)
-	//			if numMatches > 0 {
-	//				numStatesMatched++
-	//				tempStates = append([]*nfaState(nil), matches...)
-	//				foundPath = true
-	//				for _, m := range matches {
-	//					if m.threadGroups == nil {
-	//						m.threadGroups = newMatch(numGroups + 1)
-	//					}
-	//					m.threadSP = state.threadSP + 1
-	//					copy(m.threadGroups, state.threadGroups)
-	//				}
-	//			}
-	//			if numMatches < 0 {
-	//				assertionFailed = true
-	//			}
-	//			if state.isLast {
-	//				if state.isLookaround() {
-	//					lastLookaroundInList = true
-	//				}
-	//				lastStateInList = true
-	//				lastStatePtr = state
-	//			}
-	//		}
-	//
-	//		if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
-	//			// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
-	//			// state. The explanation below is my attempt to explain this behavior.
-	//			// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
-	//			//
-	//			// One of the states in our list was a last state and a lookaround. In this case, we
-	//			// don't abort upon failure of the assertion, because we have found
-	//			// another path to a final state.
-	//			// Even if the last state _was_ an assertion, we can use the previously
-	//			// saved indices to find a match.
-	//			if lastLookaroundInList {
-	//				break
-	//			} else {
-	//				if i == startingFrom {
-	//					i++
-	//				}
-	//				return false, []Group{}, i
-	//			}
-	//		}
-	//		// Check if we can find a state in our list that is:
-	//		// 	a. A last-state
-	//		// 	b. Empty
-	//		// 	c. Doesn't assert anything
-	//		for _, stateItem := range *currentStates {
-	//			s := stateItem.state
-	//			if s.isLast && s.isEmpty && s.assert == noneAssert {
-	//				lastStatePtr = s
-	//				lastStateInList = true
-	//			}
-	//		}
-	//		if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
-	//			for j := 1; j < numGroups+1; j++ {
-	//				tempIndices[j] = lastStatePtr.threadGroups[j]
-	//			}
-	//			endIdx = i
-	//			tempIndices[0] = Group{startIdx, endIdx}
-	//			if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
-	//				return true, tempIndices, tempIndices[0].EndIdx + 1
-	//			} else {
-	//				return true, tempIndices, tempIndices[0].EndIdx
-	//			}
-	//		}
-	//
-	//		// Check if we can find a zero-length match
-	//		if foundPath == false {
-	//			currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
-	//				return item.state
-	//			})
-	//			if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
-	//				if tempIndices[0].IsValid() == false {
-	//					tempIndices[0] = Group{startIdx, startIdx}
-	//				}
-	//			}
-	//			// If we haven't moved in the string, increment the counter by 1
-	//			// to ensure we don't keep trying the same string over and over.
-	//			//			if i == startingFrom {
-	//			startIdx++
-	//			//	i++
-	//			//			}
-	//			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
-	//				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
-	//					return true, tempIndices, tempIndices[0].EndIdx + 1
-	//				} else {
-	//					return true, tempIndices, tempIndices[0].EndIdx
-	//				}
-	//			}
-	//			return false, []Group{}, startIdx
-	//		}
-	//		currentStates = &priorityQueue{}
-	//		slices.Reverse(tempStates)
-	//		for _, state := range tempStates {
-	//			heap.Push(currentStates, newPriorQueueItem(state))
-	//		}
-	//		tempStates = nil
-	//
-	//		i++
-	//	}
-	//
-	// // End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
-	// // This is the exact same algorithm used inside the loop, so I should probably put it in a function.
-	//
-	//	if currentStates.Len() > 0 {
-	//		topStateItem := currentStates.peek()
-	//		topState := topStateItem.(*priorQueueItem).state
-	//		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
-	//		tempStates = append(tempStates, zeroStates...)
-	//		num_appended := 0 // Number of unique states addded to tempStates
-	//		for isZero == true {
-	//			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
-	//			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
-	//			if num_appended == 0 { // Break if we haven't appended any more unique values
-	//				break
-	//			}
-	//		}
-	//	}
-	//
-	//	for _, state := range tempStates {
-	//		heap.Push(currentStates, newPriorQueueItem(state))
-	//	}
-	//
-	// tempStates = nil
-	//
-	//	for _, stateItem := range *currentStates {
-	//		state := stateItem.state
-	//		// Only add the match if the start index is in bounds. If the state has an assertion,
-	//		// make sure the assertion checks out.
-	//		if state.isLast && i <= len(str) {
-	//			if state.assert == noneAssert || state.checkAssertion(str, i) {
-	//				for j := 1; j < numGroups+1; j++ {
-	//					tempIndices[j] = state.threadGroups[j]
-	//				}
-	//				endIdx = i
-	//				tempIndices[0] = Group{startIdx, endIdx}
-	//			}
-	//		}
-	//	}
-	//
-	//	if tempIndices.numValidGroups() > 0 {
-	//		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
-	//			return true, tempIndices, tempIndices[0].EndIdx + 1
-	//		} else {
-	//			return true, tempIndices, tempIndices[0].EndIdx
-	//		}
-	//	}
-	//
-	// if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
-	//
-	//		startIdx++
-	//	}
-	//
-	// return false, []Group{}, startIdx
+}
+
+// Expand appends template to dst, expanding any variables in template to the relevant capturing group.
+//
+// A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
+// To insert a literal $, do not put a number after it. Alternatively, you can use $$.
+// src is the input string, and match must be the result of [Reg.FindSubmatch].
+func (re Reg) Expand(dst string, template string, src string, match Match) string {
+	templateRuneSlc := []rune(template)
+	srcRuneSlc := []rune(src)
+	i := 0
+	for i < len(templateRuneSlc) {
+		c := templateRuneSlc[i]
+		if c == '$' {
+			i += 1
+			// The dollar sign is the last character of the string, or it is proceeded by another dollar sign
+			if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' {
+				dst += "$"
+				i++
+			} else {
+				numStr := ""
+				for unicode.IsDigit(templateRuneSlc[i]) {
+					numStr += string(templateRuneSlc[i])
+					i++
+				}
+				if numStr == "" {
+					dst += "$"
+				} else {
+					num, _ := strconv.Atoi(numStr)
+					if num < len(match) {
+						dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx])
+					} else {
+						dst += "$" + numStr
+					}
+				}
+			}
+		} else {
+			dst += string(c)
+			i++
+		}
+	}
+	return dst
+}
+
+// LiteralPrefix returns a string that must begin any match of the given regular expression.
+// The second return value is true if the string comprises the entire expression.
+func (re Reg) LiteralPrefix() (prefix string, complete bool) {
+	state := re.start
+	if state.assert != noneAssert {
+		state = state.next
+	}
+	for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert {
+		if state.groupBegin || state.groupEnd {
+			state = state.next
+			continue
+		}
+		prefix += string(rune(state.content[0]))
+		state = state.next
+	}
+	if state.isLast {
+		complete = true
+	} else {
+		complete = false
+	}
+	return prefix, complete
 }
--- a/regex/misc.go
+++ b/regex/misc.go
@@ -48,49 +48,6 @@ func isNormalChar(c rune) bool {
 	return !slices.Contains(specialChars, c)
 }

-// Ensure that the given elements are only appended to the given slice if they
-// don't already exist. Returns the new slice, and the number of unique items appended.
-func uniqueAppend[T comparable](slc []T, items ...T) ([]T, int) {
-	num_appended := 0
-	for _, item := range items {
-		if !slices.Contains(slc, item) {
-			slc = append(slc, item)
-			num_appended++
-		}
-	}
-	return slc, num_appended
-}
-
-func uniqueAppendFunc[T any](slc []T, fn func(T, T) bool, items ...T) ([]T, int) {
-	toRet := make([]T, len(slc))
-	num_appended := 0
-	copy(toRet, slc)
-	for _, item := range items {
-		itemExists := false
-		for _, val := range slc {
-			if fn(item, val) {
-				itemExists = true
-			}
-		}
-		if !itemExists {
-			toRet = append(toRet, item)
-			num_appended++
-		}
-	}
-	return toRet, num_appended
-}
-
-// Returns true only if all the given elements are equal
-func allEqual[T comparable](items ...T) bool {
-	first := items[0]
-	for _, item := range items {
-		if item != first {
-			return false
-		}
-	}
-	return true
-}
-
 // Map function - convert a slice of T to a slice of V, based on a function
 // that maps a T to a V
 func funcMap[T, V any](slc []T, fn func(T) V) []V {
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -47,7 +47,6 @@ type nfaState struct {
 	// The following properties depend on the current match - I should think about resetting them for every match.
 	zeroMatchFound bool    // Whether or not the state has been used for a zero-length match - only relevant for zero states
 	threadGroups   []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
-	threadSP       int     // The string pointer of the thread - where it is in the input string
 }

 // Clones the NFA starting from the given state.
@@ -123,7 +122,6 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	}
 	// Assuming it hasn't been visited
 	state.threadGroups = nil
-	state.threadSP = 0
 	visitedMap[state] = true
 	if state.isAlternation {
 		resetThreadsHelper(state.next, visitedMap)
@@ -135,7 +133,7 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {

 // Checks if the given state's assertion is true. Returns true if the given
 // state doesn't have an assertion.
-func (s nfaState) checkAssertion(str []rune, idx int) bool {
+func (s nfaState) checkAssertion(str []rune, idx int, preferLongest bool) bool {
 	if s.assert == alwaysTrueAssert {
 		return true
 	}
@@ -185,7 +183,7 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 			strToMatch = string(runesToMatch)
 		}

-		regComp := Reg{startState, s.lookaroundNumCaptureGroups}
+		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex, preferLongest}
 		matchIndices := regComp.FindAll(strToMatch)

 		numMatchesFound := 0
@@ -212,9 +210,9 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 }

 // Returns true if the contents of 's' contain the value at the given index of the given string
-func (s nfaState) contentContains(str []rune, idx int) bool {
+func (s nfaState) contentContains(str []rune, idx int, preferLongest bool) bool {
 	if s.assert != noneAssert {
-		return s.checkAssertion(str, idx)
+		return s.checkAssertion(str, idx, preferLongest)
 	}
 	if idx >= len(str) {
 		return false
@@ -331,9 +329,6 @@ func kleene(s1 *nfaState) (*nfaState, error) {
 	toReturn.isAlternation = true
 	toReturn.content = newContents(epsilon)
 	toReturn.splitState = s1
-	for i := range s1.output {
-		s1.output[i].next = toReturn
-	}

 	//	toReturn := &nfaState{}
 	//	toReturn.transitions = make(map[int][]*nfaState)
@@ -375,14 +370,20 @@ func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
 	return toReturn
 }

-func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
-	s2 := &nfaState{}
+func question(s1 *nfaState) (*nfaState, error) { // Use the fact that ab? == a(b|)
+	if s1.isEmpty && s1.assert != noneAssert {
+		return nil, fmt.Errorf("previous token is not quantifiable")
+	}
+	toReturn := &nfaState{}
+	toReturn.isEmpty = true
+	toReturn.isAlternation = true
+	toReturn.isQuestion = true
+	toReturn.content = newContents(epsilon)
+	toReturn.splitState = s1
+	toReturn.output = append([]*nfaState{}, toReturn)
+	toReturn.output = append(toReturn.output, s1.output...)
 	//	s2.transitions = make(map[int][]*nfaState)
-	s2.content = newContents(epsilon)
-	s2.output = append(s2.output, s2)
-	s2.isEmpty = true
-	s3 := alternate(s1, s2)
-	return s3
+	return toReturn, nil
 }

 // Creates and returns a new state with the 'default' values.
@@ -401,10 +402,40 @@ func newState() nfaState {
 }

 // Creates and returns a state that _always_ has a zero-length match.
-func zeroLengthMatchState() nfaState {
-	start := newState()
+func zeroLengthMatchState() *nfaState {
+	start := &nfaState{}
 	start.content = newContents(epsilon)
 	start.isEmpty = true
 	start.assert = alwaysTrueAssert
+	start.output = append([]*nfaState{}, start)
 	return start
 }
+
+func (s nfaState) equals(other nfaState) bool {
+	return s.isEmpty == other.isEmpty &&
+		s.isLast == other.isLast &&
+		slices.Equal(s.output, other.output) &&
+		slices.Equal(s.content, other.content) &&
+		s.next == other.next &&
+		s.isKleene == other.isKleene &&
+		s.isQuestion == other.isQuestion &&
+		s.isAlternation == other.isAlternation &&
+		s.splitState == other.splitState &&
+		s.assert == other.assert &&
+		s.allChars == other.allChars &&
+		slices.Equal(s.except, other.except) &&
+		s.lookaroundNFA == other.lookaroundNFA &&
+		s.groupBegin == other.groupBegin &&
+		s.groupEnd == other.groupEnd &&
+		s.groupNum == other.groupNum &&
+		slices.Equal(s.threadGroups, other.threadGroups)
+}
+
+func stateExists(list []nfaState, s nfaState) bool {
+	for i := range list {
+		if list[i].equals(s) {
+			return true
+		}
+	}
+	return false
+}
--- a/regex/priorityQueue.go
+++ b/regex/priorityQueue.go
@@ -1,89 +0,0 @@
-package regex
-
-import "container/heap"
-
-// Implement a priority queue using container/heap
-
-const (
-	min_priority int = iota
-	zerostate_priority
-	alternation_priority
-	kleene_priority
-	char_priority
-	max_priority
-)
-
-func getPriority(state *nfaState) int {
-	if state.isKleene {
-		return zerostate_priority
-	} else if state.isAlternation {
-		return alternation_priority
-	} else {
-		if state.isEmpty {
-			return zerostate_priority
-		} else {
-			return char_priority
-		}
-	}
-}
-
-type priorQueueItem struct {
-	state    *nfaState
-	priority int
-	index    int
-}
-
-func newPriorQueueItem(state *nfaState) *priorQueueItem {
-	return &priorQueueItem{
-		state:    state,
-		index:    -1,
-		priority: getPriority(state),
-	}
-}
-
-type priorityQueue []*priorQueueItem
-
-func (pq priorityQueue) Len() int {
-	return len(pq)
-}
-
-func (pq priorityQueue) Less(i, j int) bool {
-	if pq[i].priority == pq[j].priority {
-		return pq[i].index < pq[j].index
-	}
-	return pq[i].priority > pq[j].priority // We want max-heap, so we use greater-than
-}
-
-func (pq priorityQueue) Swap(i, j int) {
-	pq[i], pq[j] = pq[j], pq[i]
-	pq[i].index = i
-	pq[j].index = j
-}
-
-func (pq *priorityQueue) Push(x any) {
-	length := len(*pq)
-	item := x.(*priorQueueItem)
-	item.index = length
-	*pq = append(*pq, item)
-}
-
-func (pq *priorityQueue) Pop() any {
-	old := *pq
-	n := len(old)
-	item := old[n-1]
-	old[n-1] = nil
-	item.index = -1
-	*pq = old[0 : n-1]
-	return item
-}
-func (pq *priorityQueue) peek() any {
-	queue := *pq
-	n := len(queue)
-	return queue[n-1]
-}
-
-func (pq *priorityQueue) update(item *priorQueueItem, value *nfaState, priority int) {
-	item.state = value
-	item.priority = priority
-	heap.Fix(pq, item.index)
-}
--- a/regex/range2regex.go
+++ b/regex/range2regex.go
@@ -109,7 +109,7 @@ func range2regex(start int, end int) (string, error) {
 		startSlc := intToSlc(rg.start)
 		endSlc := intToSlc(rg.end)
 		if len(startSlc) != len(endSlc) {
-			return "", fmt.Errorf("Error parsing numeric range")
+			return "", fmt.Errorf("error parsing numeric range")
 		}
 		for i := range startSlc {
 			if startSlc[i] == endSlc[i] {
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -25,7 +25,9 @@ var reTests = []struct {
 	{"a*b", nil, "qwqw", []Group{}},
 	{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
 	{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
-	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
+	// This match will only happen with Longest()
+	// {"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
+	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}},
 	{"b*a*a", nil, "bba", []Group{{0, 3}}},
 	{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
 	{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
@@ -528,7 +530,7 @@ var groupTests = []struct {
 }{
 	{"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
 	{"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
-	{"(0)", nil, "ab", []Match{[]Group{}}},
+	{"(0)", nil, "ab", []Match{}},
 	{"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
 	{"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
 	{"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
@@ -537,10 +539,11 @@ var groupTests = []struct {
 	{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
 	{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	// This match will only happen with Longest()
+	//	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 3}, {0, 3}, {-1, -1}}}},
 	{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
+	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
 	{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
 	{"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
 	{"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
@@ -578,7 +581,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `bcdd`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, nil, `a`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, nil, `a`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\41`, nil, `a!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},

@@ -633,7 +636,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `BCDD`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, []ReFlag{RE_CASE_INSENSITIVE}, `(A, B)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
 	{`(a)(b)c|ab`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}}}},
@@ -743,7 +746,7 @@ func TestFindString(t *testing.T) {
 				foundString := regComp.FindString(test.str)
 				if len(test.result) == 0 {
 					if foundString != "" {
-						t.Errorf("Expected no match got %v\n", foundString)
+						t.Errorf("Wanted no match got %v\n", foundString)
 					}
 				} else {
 					expectedString := test.str[test.result[0].StartIdx:test.result[0].EndIdx]
@@ -791,11 +794,68 @@ func TestFindSubmatch(t *testing.T) {
 				}
 			}
 			match, err := regComp.FindSubmatch(test.str)
+			if err != nil {
+				if len(test.result) != 0 {
+					t.Errorf("Wanted %v got no match\n", test.result[0])
+				}
+			} else if len(test.result) == 0 {
+				t.Errorf("Wanted no match got %v\n", match)
+			}
 			for i := range match {
 				if match[i].IsValid() {
 					if test.result[0][i] != match[i] {
 						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 					}
+				} else {
+					if i < len(test.result) && test.result[0][i].IsValid() {
+						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
+					}
+				}
+			}
+		})
+	}
+}
+func TestFindStringSubmatch(t *testing.T) {
+	for _, test := range groupTests {
+		t.Run(test.re+"	"+test.str, func(t *testing.T) {
+			regComp, err := Compile(test.re, test.flags...)
+			if err != nil {
+				if test.result != nil {
+					panic(err)
+				}
+			}
+			matchStr := regComp.FindStringSubmatch(test.str)
+			if matchStr == nil {
+				if len(test.result) != 0 {
+					expectedStr := funcMap(test.result[0], func(g Group) string {
+						if g.IsValid() {
+							return test.str[g.StartIdx:g.EndIdx]
+						} else {
+							return ""
+						}
+					})
+					t.Errorf("Wanted %v got no match\n", expectedStr)
+				}
+			} else if len(test.result) == 0 {
+				t.Errorf("Wanted no match got %v\n", matchStr)
+			} else {
+				expectedStr := funcMap(test.result[0], func(g Group) string {
+					if g.IsValid() {
+						return test.str[g.StartIdx:g.EndIdx]
+					} else {
+						return ""
+					}
+				})
+				for i, groupStr := range matchStr {
+					if groupStr == "" {
+						if i < len(expectedStr) && expectedStr[i] != "" {
+							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
+						}
+					} else {
+						if expectedStr[i] != groupStr {
+							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
+						}
+					}
 				}
 			}
 		})
@@ -817,6 +877,10 @@ func TestFindAllSubmatch(t *testing.T) {
 						if test.result[i][j] != matchIndices[i][j] {
 							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 						}
+					} else {
+						if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
+							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
+						}
 					}
 				}
 			}
Author	SHA1	Message	Date
Aadhavan Srinivasan	d1958f289c	Commented out tests that would only pass with Longest()	2025-02-09 16:08:16 -05:00
Aadhavan Srinivasan	15ee49f42e	Rename method receivers from 'regex' to 're' (it's shorter)	2025-02-09 15:51:46 -05:00
Aadhavan Srinivasan	b60ded4136	Don't break when a match is found, if we are looking for the longest match	2025-02-09 15:48:33 -05:00
Aadhavan Srinivasan	9fbb99f86c	Wrote example for Longest()	2025-02-09 15:47:57 -05:00
Aadhavan Srinivasan	af15904f3b	Updated documentation	2025-02-09 15:41:13 -05:00
Aadhavan Srinivasan	d522f50b50	Wrote new example functions	2025-02-09 15:40:59 -05:00
Aadhavan Srinivasan	fb47e082eb	Wrote new methods Expand() and preferLongest(); Use new function signatures (with preferLongest); only characters should be added to next state list	2025-02-09 15:40:39 -05:00
Aadhavan Srinivasan	1f5a363539	Use new function signatures (with preferLongest)	2025-02-09 15:39:09 -05:00
Aadhavan Srinivasan	9e12f9dcb3	Added field to Reg, denoting if we prefer longest match (POSIX style) or not (perl style)	2025-02-09 15:38:26 -05:00
Aadhavan Srinivasan	47f88c817f	Fixed typo	2025-02-09 15:14:17 -05:00
Aadhavan Srinivasan	835d495990	Removed capitalization for error message (staticcheck)	2025-02-09 09:14:45 -05:00
Aadhavan Srinivasan	76e0170cb9	Removed unused function	2025-02-09 09:13:52 -05:00
Aadhavan Srinivasan	d172a58258	Throw error if match isn't found but test.result has >0 elements	2025-02-09 09:13:29 -05:00
Aadhavan Srinivasan	7231169270	Removed unused functions	2025-02-09 09:13:03 -05:00
Aadhavan Srinivasan	e546f01c20	Removed redundant return (staticcheck)	2025-02-09 09:12:55 -05:00
Aadhavan Srinivasan	b7467a00f1	Removed priorityQueue (unused)	2025-02-09 09:07:43 -05:00
Aadhavan Srinivasan	c6ad4caa0d	Removed a bunch of unused code (let's go!!!)	2025-02-09 09:06:40 -05:00
Aadhavan Srinivasan	6334435b83	Updated tests since the engine uses Perl matching instead of POSIX matching; added tests for FindStringSubmatch	2025-02-09 09:01:42 -05:00
Aadhavan Srinivasan	78fb5606dd	Use new definition of Reg	2025-02-09 08:59:16 -05:00
Aadhavan Srinivasan	eddd2ae700	Updated documentation	2025-02-09 08:58:58 -05:00
Aadhavan Srinivasan	c577064977	Added string field to Reg, that contains the expression string; wrote method to return the string	2025-02-09 08:58:46 -05:00
Aadhavan Srinivasan	d4e3942d27	Added Match() and FindStringSubmatch(); removed old code; updated comments	2025-02-09 08:58:09 -05:00
Aadhavan Srinivasan	f15a5cae34	Store all states visited in a single run of 'addStateToList()' in a slice	2025-02-08 16:07:01 -05:00
Aadhavan Srinivasan	62ca1a872a	Made zeroLengthMatchState() return a pointer; reduced the number of comparisons performd by nfaState.equals	2025-02-08 16:06:14 -05:00
Aadhavan Srinivasan	99230b49de	Use new function signature for zeroLengthMatchState()	2025-02-08 16:05:35 -05:00
Aadhavan Srinivasan	22ead83625	Fixed assertion matching	2025-02-07 16:19:36 -05:00
Aadhavan Srinivasan	3604486a9b	Used Pike's algorithm (an extension to Thompson's algorithm) (see Russ Cox's 2nd article); I think I almost have a working PCRE-style engine	2025-02-07 16:06:45 -05:00
Aadhavan Srinivasan	052de55826	question() now returns 2 values	2025-02-07 16:04:46 -05:00
Aadhavan Srinivasan	d2ad0d95a8	Modified question operator so that it doesn't create an unnecessary zero-state	2025-02-07 16:04:26 -05:00
Aadhavan Srinivasan	ccf3b3b299	More progress on implementing PCRE matching	2025-02-06 22:08:56 -05:00
Aadhavan Srinivasan	1d4f695f8f	Wrote function to check if a state is in an nfaState, based on the Equals function	2025-02-06 22:06:51 -05:00
Aadhavan Srinivasan	8534174ea1	Use pointers instead of values	2025-02-06 22:06:22 -05:00