Merge pull request 'Implement PCRE Matching (prefer left-branch)' (#2) from implementPCREMatchingRules into master

Reviewed-on: #2
2025-02-09 15:24:26 -06:00
parent bc32e0cb76 d1958f289c
commit 662527c478
11 changed files with 578 additions and 602 deletions
--- a/4
+++ b/4
@@ -6,8 +6,8 @@ fmt:
 vet: fmt
 	go vet ./...
 buildLib: vet
-	go build -gcflags="-N -l" ./...
+	go build -gcflags="all=-N -l" ./...
 buildCmd: buildLib
-	go build -C cmd/ -gcflags="-N -l" -o re ./...
+	go build -C cmd/ -gcflags="all=-N -l" -o re ./...
 test: buildCmd
 	go test -v ./...
--- a/cmd/unique_array.go
+++ b/cmd/unique_array.go
@@ -16,7 +16,6 @@ func (s *uniq_arr[T]) add(vals ...T) {
 			s.backingMap[item] = struct{}{}
 		}
 	}
 	return
 }
 func (s uniq_arr[T]) contains(val T) bool {
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -12,16 +12,27 @@ var notDotChars []rune
 // A Reg represents the result of compiling a regular expression. It contains
 // the startState of the NFA representation of the regex, and the number of capturing
-// groups in the regex.
+// groups in the regex. It also contains the expression string.
 type Reg struct {
 	start         *nfaState
 	numGroups     int
 	str           string
 	preferLongest bool
 }
-// numSubexp eturns the number of sub-expressions in the given [Reg]. This is equivalent
+// NumSubexp returns the number of sub-expressions in the given [Reg]. This is equivalent
 // to the number of capturing groups.
-func (r Reg) NumSubexp() int {
+func (re Reg) NumSubexp() int {
-	return r.numGroups
+	return re.numGroups
 }
 // String returns the string used to compile the expression.
 func (re Reg) String() string {
 	return re.str
 }
 func (re *Reg) Longest() {
 	re.preferLongest = true
 }
 const concatRune rune = 0xF0001
@@ -816,13 +827,12 @@ func thompson(re []postfixNode) (Reg, error) {
 	// In these cases, we will return an NFA with 1 state, with an assertion that is always true.
 	if len(re) == 0 {
 		start := zeroLengthMatchState()
-		nfa = append(nfa, &start)
+		nfa = append(nfa, start)
 	}
 	for _, c := range re {
 		if c.nodetype == characterNode || c.nodetype == assertionNode {
 			stateToAdd := nfaState{}
 			stateToAdd.transitions = make(map[int][]*nfaState)
 			if c.allChars {
 				stateToAdd.allChars = true
 				if len(c.except) != 0 {
@@ -934,7 +944,6 @@ func thompson(re []postfixNode) (Reg, error) {
 			s.isEmpty = true
 			s.output = make([]*nfaState, 0)
 			s.output = append(s.output, s)
 			s.transitions = make(map[int][]*nfaState)
 			// LPAREN nodes are just added normally
 			if c.nodetype == lparenNode {
 				numGroups++
@@ -966,7 +975,7 @@ func thompson(re []postfixNode) (Reg, error) {
 					s.groupNum = lparenNode.groupNum
 					to_add := concatenate(lparenNode, s)
 					nfa = append(nfa, to_add)
-				} else if middleNode.groupBegin && len(middleNode.transitions) == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
+				} else if middleNode.groupBegin && middleNode.numTransitions() == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
 					nfa = append(nfa, lparenNode)    // I shouldn't have popped this out, because it is not involved in the current capturing group
 					s.groupNum = middleNode.groupNum // In this case, the 'middle' node is actually an lparen
 					to_add := concatenate(middleNode, s)
@@ -989,7 +998,8 @@ func thompson(re []postfixNode) (Reg, error) {
 		if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated
 			// Map the list of nodes to a list of states, each state containing the contents of a specific node
 			states := funcMap(c.nodeContents, func(node postfixNode) *nfaState {
-				s := newState()
+				s := &nfaState{}
 				s.output = append(s.output, s)
 				nodeContents := node.contents
 				if caseInsensitive {
 					nodeContents = slices.Concat(funcMap(nodeContents, func(r rune) []rune {
@@ -1003,7 +1013,7 @@ func thompson(re []postfixNode) (Reg, error) {
 						return n.contents
 					})...)
 				}
-				return &s
+				return s
 			})
 			// Reduce the list of states down to a single state by alternating them
 			toAdd := funcReduce(states, func(s1 *nfaState, s2 *nfaState) *nfaState {
@@ -1030,14 +1040,14 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, fmt.Errorf("error applying kleene star")
 			}
-			stateToAdd, err := kleene(*s1)
+			stateToAdd, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
 			nfa = append(nfa, stateToAdd)
 		case plusNode: // a+ is equivalent to aa*
 			s1 := mustPop(&nfa)
-			s2, err := kleene(*s1)
+			s2, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
@@ -1048,7 +1058,10 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, fmt.Errorf("error applying question operator")
 			}
-			s2 := question(s1)
+			s2, err := question(s1)
 			if err != nil {
 				return Reg{}, err
 			}
 			nfa = append(nfa, s2)
 		case pipeNode:
 			// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
@@ -1059,21 +1072,21 @@ func thompson(re []postfixNode) (Reg, error) {
 			// 	'|a'
 			// 	'^a|'
 			// 	'^|a'
-			s1, err1 := pop(&nfa)
+			s2, err1 := pop(&nfa)
-			s2, err2 := pop(&nfa)
+			s1, err2 := pop(&nfa)
-			if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err2 != nil || (s2.groupBegin && s2.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back
 					nfa = append(nfa, s2)
 				}
 				tmp := zeroLengthMatchState()
-				s2 = &tmp
+				s2 = tmp
 			}
-			if err1 != nil || (s1.groupBegin && len(s1.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err1 == nil { // See above for explanation
 					nfa = append(nfa, s1)
 				}
 				tmp := zeroLengthMatchState()
-				s1 = &tmp
+				s1 = tmp
 			}
 			s3 := alternate(s1, s2)
 			nfa = append(nfa, s3)
@@ -1100,14 +1113,18 @@ func thompson(re []postfixNode) (Reg, error) {
 				stateToAdd = concatenate(stateToAdd, cloneState(poppedState))
 			}
 			if c.endReps == infinite_reps { // Case 3
-				s2, err := kleene(*poppedState)
+				s2, err := kleene(poppedState)
 				if err != nil {
 					return Reg{}, err
 				}
 				stateToAdd = concatenate(stateToAdd, s2)
 			} else { // Case 2
 				for i := c.startReps; i < c.endReps; i++ {
-					stateToAdd = concatenate(stateToAdd, question(cloneState(poppedState)))
+					tmp, err := question(cloneState(poppedState))
 					if err != nil {
 						return Reg{}, fmt.Errorf("error processing bounded repetition")
 					}
 					stateToAdd = concatenate(stateToAdd, tmp)
 				}
 			}
 			nfa = append(nfa, stateToAdd)
@@ -1117,9 +1134,13 @@ func thompson(re []postfixNode) (Reg, error) {
 		return Reg{}, fmt.Errorf("invalid regex")
 	}
-	verifyLastStates(nfa)
+	lastState := newState()
 	lastState.isLast = true
-	return Reg{nfa[0], numGroups}, nil
+	concatenate(nfa[0], &lastState)
 	// The string is empty here, because we add it in Compile()
 	return Reg{nfa[0], numGroups, "", false}, nil
 }
@@ -1137,10 +1158,11 @@ func Compile(re string, flags ...ReFlag) (Reg, error) {
 	if err != nil {
 		return Reg{}, fmt.Errorf("error compiling regex: %w", err)
 	}
 	reg.str = re
 	return reg, nil
 }
-// MustCompile panicks if Compile returns an error. They are identical in all other respects.
+// MustCompile panics if Compile returns an error. They are identical in all other respects.
 func MustCompile(re string, flags ...ReFlag) Reg {
 	reg, err := Compile(re, flags...)
 	if err != nil {
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -4,6 +4,8 @@ Package regex implements regular expression search, using a custom non-bracktrac
 The engine relies completely on UTF-8 codepoints. As such, it is capable of matching characters
 from other languages, emojis and symbols.
 The API and regex syntax are largely compatible with that of the stdlib's [regexp], with a few key differences (see 'Key Differences with regexp').
 The full syntax is specified below.
 # Syntax
@@ -55,8 +57,8 @@ POSIX classes (inside normal character classes):
 Composition:
 	def				Match d, followed by e, followed by f
-	x|y				Match x or y (prefer longer one)
+	x|y				Match x or y (prefer x)
-	xy|z			Match xy or z
+	xy|z			Match xy or z (prefer xy)
 Repitition (always greedy, preferring more):
@@ -94,10 +96,11 @@ Lookarounds:
 Numeric ranges:
 	<x-y>			Match any number from x to y (inclusive) (x and y must be positive numbers)
 	\<x				Match a literal '<' followed by x
 # Key Differences with regexp
-The engine and the API differ from [regexp] in a number of ways, some of them very subtle.
+The engine and the API differ from [regexp] in a few ways, some of them very subtle.
 The key differences are mentioned below.
 1. Greediness:
@@ -132,7 +135,7 @@ Rather than using primitives for return values, my engine defines two types that
 values: a [Group] represents a capturing group, and a [Match] represents a list of groups.
 [regexp] specifies a regular expression that gives a list of all the matching functions that it supports. The
-equivalent expression for this engine is:
+equivalent expression for this engine is shown below. Note that 'Index' is the default.
 	Find(All)?(String)?(Submatch)?
@@ -140,7 +143,7 @@ equivalent expression for this engine is:
 If a function contains 'All' it returns all matches instead of just the leftmost one.
-If a function contains 'String' it returns the matched text, rather than the indices.
+If a function contains 'String' it returns the matched text, rather than the index in the string.
 If a function contains 'Submatch' it returns the match, including all submatches found by
 capturing groups.
@@ -156,5 +159,20 @@ and the input string:
 The 0th group would contain 'xy' and the 1st group would contain 'y'. Any matching function without 'Submatch' in its name
 returns the 0-group.
 # Feature Differences
 The following features from [regexp] are (currently) NOT supported:
 1. Named capturing groups
 2. Non-greedy operators
 3. Unicode character classes
 4. Embedded flags (flags are passed as arguments to [Compile])
 5. Literal text with \Q ... \E
 The following features are not available in [regexp], but are supported in my engine:
 1. Lookarounds
 2. Numeric ranges
 I hope to shorten the first list, and expand the second.
 */
 package regex
--- a/regex/example_test.go
+++ b/regex/example_test.go
@@ -52,3 +52,40 @@ func ExampleReg_FindSubmatch() {
 	// 0	1
 	// 2	3
 }
 func ExampleReg_Expand() {
 	inputStr := `option1: value1
 	option2: value2`
 	regexStr := `(\w+): (\w+)`
 	templateStr := "$1 = $2\n"
 	regexComp := regex.MustCompile(regexStr, regex.RE_MULTILINE)
 	result := ""
 	for _, submatches := range regexComp.FindAllSubmatch(inputStr) {
 		result = regexComp.Expand(result, templateStr, inputStr, submatches)
 	}
 	fmt.Println(result)
 	// Output: option1 = value1
 	// option2 = value2
 }
 func ExampleReg_LiteralPrefix() {
 	regexStr := `a(b|c)d*`
 	regexComp := regex.MustCompile(regexStr)
 	prefix, complete := regexComp.LiteralPrefix()
 	fmt.Println(prefix)
 	fmt.Println(complete)
 	// Output: a
 	// false
 }
 func ExampleReg_Longest() {
 	regexStr := `x|xx`
 	inputStr := "xx"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.FindString(inputStr))
 	regexComp.Longest()
 	fmt.Println(regexComp.FindString(inputStr))
 	// Output: x
 	// xx
 }
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -2,8 +2,8 @@ package regex
 import (
 	"fmt"
-	"slices"
+	"strconv"
-	"sort"
+	"unicode"
 )
 // A Match represents a match found by the regex in a given string.
@@ -15,7 +15,7 @@ import (
 // See [Reg.FindSubmatch] for an example.
 type Match []Group
-// a Group represents a group. It contains the start index and end index of the match
+// a Group represents a capturing group. It contains the start and index of the group.
 type Group struct {
 	StartIdx int
 	EndIdx   int
@@ -30,17 +30,6 @@ func newMatch(size int) Match {
 	return toRet
 }
 // Returns the number of valid groups in the match
 func (m Match) numValidGroups() int {
 	numValid := 0
 	for _, g := range m {
 		if g.StartIdx >= 0 && g.EndIdx >= 0 {
 			numValid++
 		}
 	}
 	return numValid
 }
 // Returns a string containing the indices of all (valid) groups in the match
 func (m Match) String() string {
 	var toRet string
@@ -59,7 +48,7 @@ func (idx Group) String() string {
 	return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
 }
-// Returns whether a group is valid (ie. whether it matched any text). It
+// IsValid returns whether a group is valid (ie. whether it matched any text). It
 // simply ensures that both indices of the group are >= 0.
 func (g Group) IsValid() bool {
 	return g.StartIdx >= 0 && g.EndIdx >= 0
@@ -70,101 +59,42 @@ func getZeroGroup(m Match) Group {
 	return m[0]
 }
-// takeZeroState takes the 0-state (if such a transition exists) for all states in the
+func copyThread(to *nfaState, from nfaState) {
-// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
+	to.threadGroups = append([]Group{}, from.threadGroups...)
 // the second ret val is true.
 // If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
 func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
 	for _, state := range states {
 		if len(state.transitions[epsilon]) > 0 {
 			for _, s := range state.transitions[epsilon] {
 				if s.threadGroups == nil {
 					s.threadGroups = newMatch(numGroups + 1)
 				}
 				copy(s.threadGroups, state.threadGroups)
 				if s.groupBegin {
 					s.threadGroups[s.groupNum].StartIdx = idx
 					//					openParenGroups = append(openParenGroups, s.groupNum)
 				}
 				if s.groupEnd {
 					s.threadGroups[s.groupNum].EndIdx = idx
 					//					closeParenGroups = append(closeParenGroups, s.groupNum)
 				}
 			}
 			rtv = append(rtv, state.transitions[epsilon]...)
 		}
 	}
 	for _, state := range rtv {
 		if len(state.transitions[epsilon]) > 0 {
 			return rtv, true
 		}
 	}
 	return rtv, false
 }
 // zeroMatchPossible returns true if a zero-length match is possible
 // from any of the given states, given the string and our position in it.
 // It uses the same algorithm to find zero-states as the one inside the loop,
 // so I should probably put it in a function.
 func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
 	zeroStates, isZero := takeZeroState(states, numGroups, idx)
 	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
 	tempstates = append(tempstates, states...)
 	tempstates = append(tempstates, zeroStates...)
 	num_appended := 0 // number of unique states addded to tempstates
 	for isZero == true {
 		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
 		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
 		if num_appended == 0 { // break if we haven't appended any more unique values
 			break
 		}
 	}
 	for _, state := range tempstates {
 		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
 			return true
 		}
 	}
 	return false
 }
 // Prunes the slice by removing overlapping indices.
 func pruneIndices(indices []Match) []Match {
 	// First, sort the slice by the start indices
 	sort.Slice(indices, func(i, j int) bool {
 		return indices[i][0].StartIdx < indices[j][0].StartIdx
 	})
 	toRet := make([]Match, 0, len(indices))
 	current := indices[0]
 	for _, idx := range indices[1:] {
 		// idx doesn't overlap with current (starts after current ends), so add current to result
 		// and update the current.
 		if idx[0].StartIdx >= current[0].EndIdx {
 			toRet = append(toRet, current)
 			current = idx
 		} else if idx[0].EndIdx > current[0].EndIdx {
 			// idx overlaps, but it is longer, so update current
 			current = idx
 		}
 	}
 	// Add last state
 	toRet = append(toRet, current)
 	return toRet
 }
 // Find returns the 0-group of the leftmost match of the regex in the given string.
 // An error value != nil indicates that no match was found.
-func (regex Reg) Find(str string) (Group, error) {
+func (re Reg) Find(str string) (Group, error) {
-	match, err := regex.FindNthMatch(str, 1)
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Group{}, fmt.Errorf("no matches found")
 	}
 	return getZeroGroup(match), nil
 }
 // Match returns a boolean value, indicating whether the regex found a match in the given string.
 func (re Reg) Match(str string) bool {
 	_, err := re.Find(str)
 	return err == nil
 }
 // CompileMatch compiles expr and returns true if str contains a match of the expression.
 // It is equivalent to [regexp.Match].
 // An optional list of flags may be provided (see [ReFlag]).
 // It returns an error (!= nil) if there was an error compiling the expression.
 func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) {
 	re, err := Compile(expr, flags...)
 	if err != nil {
 		return false, err
 	}
 	return re.Match(str), nil
 }
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
-func (regex Reg) FindAll(str string) []Group {
+func (re Reg) FindAll(str string) []Group {
-	indices := regex.FindAllSubmatch(str)
+	indices := re.FindAllSubmatch(str)
 	zeroGroups := funcMap(indices, getZeroGroup)
 	return zeroGroups
 }
@@ -173,8 +103,8 @@ func (regex Reg) FindAll(str string) []Group {
 // The return value will be an empty string in two situations:
 //  1. No match was found
 //  2. The match was an empty string
-func (regex Reg) FindString(str string) string {
+func (re Reg) FindString(str string) string {
-	match, err := regex.FindNthMatch(str, 1)
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return ""
 	}
@@ -187,8 +117,8 @@ func (regex Reg) FindString(str string) string {
 // number of groups. The validity of a group (whether or not it matched anything) can be determined with
 // [Group.IsValid], or by checking that both indices of the group are >= 0.
 // The second-return value is nil if no match was found.
-func (regex Reg) FindSubmatch(str string) (Match, error) {
+func (re Reg) FindSubmatch(str string) (Match, error) {
-	match, err := regex.FindNthMatch(str, 1)
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Match{}, fmt.Errorf("no match found")
 	} else {
@@ -196,11 +126,41 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 	}
 }
-// FindAllString is the 'all' version of FindString.
+// FindStringSubmatch is the 'string' version of [FindSubmatch]. It returns a slice of strings,
 // where the string at index i contains the text matched by the i-th capturing group.
 // The 0-th index represents the entire match.
 // An empty string at index n could mean:
 // ,
 //  1. Group n did not find a match
 //  2. Group n found a zero-length match
 //
 // A return value of nil indicates no match.
 func (re Reg) FindStringSubmatch(str string) []string {
 	matchStr := make([]string, re.numGroups+1)
 	match, err := re.FindSubmatch(str)
 	if err != nil {
 		return nil
 	}
 	nonEmptyMatchFound := false
 	for i := range match {
 		if match[i].IsValid() {
 			matchStr[i] = str[match[i].StartIdx:match[i].EndIdx]
 			nonEmptyMatchFound = true
 		} else {
 			matchStr[i] = ""
 		}
 	}
 	if nonEmptyMatchFound == false {
 		return nil
 	}
 	return matchStr
 }
 // FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
-func (regex Reg) FindAllString(str string) []string {
+func (re Reg) FindAllString(str string) []string {
-	zerogroups := regex.FindAll(str)
+	zerogroups := re.FindAll(str)
 	matchStrs := funcMap(zerogroups, func(g Group) string {
 		return str[g.StartIdx:g.EndIdx]
 	})
@@ -209,14 +169,14 @@ func (regex Reg) FindAllString(str string) []string {
 // FindNthMatch return the 'n'th match of the regex in the given string.
 // It returns an error (!= nil) if there are fewer than 'n' matches in the string.
-func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
+func (re Reg) FindNthMatch(str string, n int) (Match, error) {
 	idx := 0
 	matchNum := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			matchNum++
 		}
@@ -229,31 +189,65 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
 }
 // FindAllSubmatch returns a slice of matches in the given string.
-func (regex Reg) FindAllSubmatch(str string) []Match {
+func (re Reg) FindAllSubmatch(str string) []Match {
 	idx := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	indices := make([]Match, 0)
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			indices = append(indices, matchIdx)
 		}
 	}
 	if len(indices) > 0 {
 		return pruneIndices(indices)
 	}
 	return indices
 }
 func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
 	if stateExists(list, state) || stateExists(visited, state) {
 		return list
 	}
 	visited = append(visited, state)
 	if state.isKleene || state.isQuestion {
 		copyThread(state.splitState, state)
 		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		copyThread(state.next, state)
 		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		return list
 	}
 	if state.isAlternation {
 		copyThread(state.next, state)
 		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		copyThread(state.splitState, state)
 		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		return list
 	}
 	state.threadGroups = append([]Group{}, threadGroups...)
 	if state.assert != noneAssert {
 		if state.checkAssertion(str, idx, preferLongest) {
 			copyThread(state.next, state)
 			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 		}
 	}
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	if state.groupEnd {
 		state.threadGroups[state.groupNum].EndIdx = idx
 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	return append(list, state)
 }
 // Helper for FindAllMatches. Returns whether it found a match, the
 // first Match it finds, and how far it got into the string ie. where
 // the next search should start from.
-//
+func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) {
 //	Might return duplicates or overlapping indices, so care must be taken to prune the resulting array.
 func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
 	// Base case - exit if offset exceeds string's length
 	if offset > len(str) {
 		// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
@@ -261,214 +255,120 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	}
 	resetThreads(start)
-	// Hold a list of match indices for the current run. When we
+	currentStates := make([]nfaState, 0)
-	// can no longer find a match, the match with the largest range is
+	nextStates := make([]nfaState, 0)
 	// chosen as the match for the entire string.
 	// This allows us to pick the longest possible match (which is how greedy matching works).
 	// COMMENT ABOVE IS CURRENTLY NOT UP-TO-DATE
 	tempIndices := newMatch(numGroups + 1)
 	foundPath := false
 	startIdx := offset
 	endIdx := offset
 	currentStates := make([]*nfaState, 0)
 	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
 	i := offset // Index in string
 	startingFrom := i                  // Store starting index
 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
 	if start.assert != noneAssert {
-		if start.checkAssertion(str, offset) == false {
+		if start.checkAssertion(str, offset, preferLongest) == false {
 			i++
 			return false, []Group{}, i
 		}
 	}
 	// Increment until we hit a character matching the start state (assuming not 0-state)
 	if start.isEmpty == false {
 		for i < len(str) && !start.contentContains(str, i) {
 			i++
 		}
 		startIdx = i
 		startingFrom = i
 		i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
 	}
 	start.threadGroups = newMatch(numGroups + 1)
-	// Check if the start state begins a group - if so, add the start index to our list
+	start.threadGroups[0].StartIdx = i
-	if start.groupBegin {
+	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest)
-		start.threadGroups[start.groupNum].StartIdx = i
+	var match Match = nil
-		//		tempIndices[start.groupNum].startIdx = i
+	for idx := i; idx <= len(str); idx++ {
 	}
 	currentStates = append(currentStates, start)
 	// Main loop
 	for i < len(str) {
 		foundPath = false
 		zeroStates := make([]*nfaState, 0)
 		// Keep taking zero-states, until there are no more left to take
 		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
 		zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
 		tempStates = append(tempStates, zeroStates...)
 		num_appended := 0
 		for isZero == true {
 			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
 			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 			if num_appended == 0 { // Break if we haven't appended any more unique values
 				break
 			}
 		}
 		currentStates = slices.Concat(currentStates, tempStates)
 		tempStates = nil
 		// Take any transitions corresponding to current character
 		numStatesMatched := 0            // The number of states which had at least 1 match for this round
 		assertionFailed := false         // Whether or not an assertion failed for this round
 		lastStateInList := false         // Whether or not a last state was in our list of states
 		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
 		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
 		for numStatesMatched == 0 && lastStateInList == false {
 		if len(currentStates) == 0 {
 			break
 		}
-			state, _ := pop(&currentStates)
+		for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
-			matches, numMatches := state.matchesFor(str, i)
+			currentState := currentStates[currentStateIdx]
-			if numMatches > 0 {
+
-				numStatesMatched++
+			if currentState.threadGroups == nil {
-				tempStates = append([]*nfaState(nil), matches...)
+				currentState.threadGroups = newMatch(numGroups + 1)
-				foundPath = true
+				currentState.threadGroups[0].StartIdx = idx
 				for _, m := range matches {
 					if m.threadGroups == nil {
 						m.threadGroups = newMatch(numGroups + 1)
 			}
-					copy(m.threadGroups, state.threadGroups)
+
 			if currentState.isLast {
 				currentState.threadGroups[0].EndIdx = idx
 				match = append([]Group{}, currentState.threadGroups...)
 				if !preferLongest {
 					break
 				}
 			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
 				if currentState.contentContains(str, idx, preferLongest) {
 					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
 				}
 			}
-			if numMatches < 0 {
+		}
-				assertionFailed = true
+		currentStates = append([]nfaState{}, nextStates...)
 		nextStates = nil
 	}
 	if match != nil {
 		if offset == match[0].EndIdx {
 			return true, match, match[0].EndIdx + 1
 		}
 		return true, match, match[0].EndIdx
 	}
 	return false, []Group{}, i + 1
 }
 // Expand appends template to dst, expanding any variables in template to the relevant capturing group.
 //
 // A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
 // To insert a literal $, do not put a number after it. Alternatively, you can use $$.
 // src is the input string, and match must be the result of [Reg.FindSubmatch].
 func (re Reg) Expand(dst string, template string, src string, match Match) string {
 	templateRuneSlc := []rune(template)
 	srcRuneSlc := []rune(src)
 	i := 0
 	for i < len(templateRuneSlc) {
 		c := templateRuneSlc[i]
 		if c == '$' {
 			i += 1
 			// The dollar sign is the last character of the string, or it is proceeded by another dollar sign
 			if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' {
 				dst += "$"
 				i++
 			} else {
 				numStr := ""
 				for unicode.IsDigit(templateRuneSlc[i]) {
 					numStr += string(templateRuneSlc[i])
 					i++
 				}
 				if numStr == "" {
 					dst += "$"
 				} else {
 					num, _ := strconv.Atoi(numStr)
 					if num < len(match) {
 						dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx])
 					} else {
 						dst += "$" + numStr
 					}
 				}
 			}
 		} else {
 			dst += string(c)
 			i++
 		}
 	}
 	return dst
 }
 // LiteralPrefix returns a string that must begin any match of the given regular expression.
 // The second return value is true if the string comprises the entire expression.
 func (re Reg) LiteralPrefix() (prefix string, complete bool) {
 	state := re.start
 	if state.assert != noneAssert {
 		state = state.next
 	}
 	for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert {
 		if state.groupBegin || state.groupEnd {
 			state = state.next
 			continue
 		}
 		prefix += string(rune(state.content[0]))
 		state = state.next
 	}
 	if state.isLast {
-				if state.isLookaround() {
+		complete = true
 					lastLookaroundInList = true
 				}
 				lastStateInList = true
 				lastStatePtr = state
 			}
 		}
 		if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
 			// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
 			// state. The explanation below is my attempt to explain this behavior.
 			// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
 			//
 			// One of the states in our list was a last state and a lookaround. In this case, we
 			// don't abort upon failure of the assertion, because we have found
 			// another path to a final state.
 			// Even if the last state _was_ an assertion, we can use the previously
 			// saved indices to find a match.
 			if lastLookaroundInList {
 				break
 	} else {
-				if i == startingFrom {
+		complete = false
 					i++
 	}
-				return false, []Group{}, i
+	return prefix, complete
 			}
 		}
 		// Check if we can find a state in our list that is:
 		// 	a. A last-state
 		// 	b. Empty
 		// 	c. Doesn't assert anything
 		for _, s := range currentStates {
 			if s.isLast && s.isEmpty && s.assert == noneAssert {
 				lastStatePtr = s
 				lastStateInList = true
 			}
 		}
 		if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
 			for j := 1; j < numGroups+1; j++ {
 				tempIndices[j] = lastStatePtr.threadGroups[j]
 			}
 			endIdx = i
 			tempIndices[0] = Group{startIdx, endIdx}
 			if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
 				return true, tempIndices, tempIndices[0].EndIdx + 1
 			} else {
 				return true, tempIndices, tempIndices[0].EndIdx
 			}
 		}
 		// Check if we can find a zero-length match
 		if foundPath == false {
 			if ok := zeroMatchPossible(str, i, numGroups, currentStates...); ok {
 				if tempIndices[0].IsValid() == false {
 					tempIndices[0] = Group{startIdx, startIdx}
 				}
 			}
 			// If we haven't moved in the string, increment the counter by 1
 			// to ensure we don't keep trying the same string over and over.
 			//			if i == startingFrom {
 			startIdx++
 			//	i++
 			//			}
 			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
 				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
 					return true, tempIndices, tempIndices[0].EndIdx + 1
 				} else {
 					return true, tempIndices, tempIndices[0].EndIdx
 				}
 			}
 			return false, []Group{}, startIdx
 		}
 		currentStates = make([]*nfaState, len(tempStates))
 		copy(currentStates, tempStates)
 		tempStates = nil
 		i++
 	}
 	// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
 	// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
 	zeroStates, isZero := takeZeroState(currentStates, numGroups, i)
 	tempStates = append(tempStates, zeroStates...)
 	num_appended := 0 // Number of unique states addded to tempStates
 	for isZero == true {
 		zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
 		tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 		if num_appended == 0 { // Break if we haven't appended any more unique values
 			break
 		}
 	}
 	currentStates = append(currentStates, tempStates...)
 	tempStates = nil
 	for _, state := range currentStates {
 		// Only add the match if the start index is in bounds. If the state has an assertion,
 		// make sure the assertion checks out.
 		if state.isLast && i <= len(str) {
 			if state.assert == noneAssert || state.checkAssertion(str, i) {
 				for j := 1; j < numGroups+1; j++ {
 					tempIndices[j] = state.threadGroups[j]
 				}
 				endIdx = i
 				tempIndices[0] = Group{startIdx, endIdx}
 			}
 		}
 	}
 	if tempIndices.numValidGroups() > 0 {
 		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
 			return true, tempIndices, tempIndices[0].EndIdx + 1
 		} else {
 			return true, tempIndices, tempIndices[0].EndIdx
 		}
 	}
 	if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
 		startIdx++
 	}
 	return false, []Group{}, startIdx
 }
--- a/regex/misc.go
+++ b/regex/misc.go
@@ -48,49 +48,6 @@ func isNormalChar(c rune) bool {
 	return !slices.Contains(specialChars, c)
 }
 // Ensure that the given elements are only appended to the given slice if they
 // don't already exist. Returns the new slice, and the number of unique items appended.
 func uniqueAppend[T comparable](slc []T, items ...T) ([]T, int) {
 	num_appended := 0
 	for _, item := range items {
 		if !slices.Contains(slc, item) {
 			slc = append(slc, item)
 			num_appended++
 		}
 	}
 	return slc, num_appended
 }
 func uniqueAppendFunc[T any](slc []T, fn func(T, T) bool, items ...T) ([]T, int) {
 	toRet := make([]T, len(slc))
 	num_appended := 0
 	copy(toRet, slc)
 	for _, item := range items {
 		itemExists := false
 		for _, val := range slc {
 			if fn(item, val) {
 				itemExists = true
 			}
 		}
 		if !itemExists {
 			toRet = append(toRet, item)
 			num_appended++
 		}
 	}
 	return toRet, num_appended
 }
 // Returns true only if all the given elements are equal
 func allEqual[T comparable](items ...T) bool {
 	first := items[0]
 	for _, item := range items {
 		if item != first {
 			return false
 		}
 	}
 	return true
 }
 // Map function - convert a slice of T to a slice of V, based on a function
 // that maps a T to a V
 func funcMap[T, V any](slc []T, fn func(T) V) []V {
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -29,10 +29,12 @@ type nfaState struct {
 	isEmpty bool          // If it is empty - Union operator and Kleene star states will be empty
 	isLast  bool          // If it is the last state (acept state)
 	output  []*nfaState   // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
-	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
+	//	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
 	next                       *nfaState  // The next state (not for alternation or kleene states)
 	isKleene                   bool       // Identifies whether current node is a 0-state representing Kleene star
 	isQuestion                 bool       // Identifies whether current node is a 0-state representing the question operator
 	isAlternation              bool       // Identifies whether current node is a 0-state representing an alternation
 	splitState                 *nfaState  // Only for alternation states - the 'other' branch of the alternation ('next' is the first)
 	assert                     assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
 	allChars                   bool       // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
 	except                     []rune     // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
@@ -70,7 +72,6 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 		isEmpty:         stateToClone.isEmpty,
 		isLast:          stateToClone.isLast,
 		output:          make([]*nfaState, len(stateToClone.output)),
 		transitions:     make(map[int][]*nfaState),
 		isKleene:        stateToClone.isKleene,
 		isQuestion:      stateToClone.isQuestion,
 		isAlternation:   stateToClone.isAlternation,
@@ -91,20 +92,18 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 			clone.output[i] = cloneStateHelper(s, cloneMap)
 		}
 	}
 	for k, v := range stateToClone.transitions {
 		clone.transitions[k] = make([]*nfaState, len(v))
 		for i, s := range v {
 			if s == stateToClone {
 				clone.transitions[k][i] = clone
 			} else {
 				clone.transitions[k][i] = cloneStateHelper(s, cloneMap)
 			}
 		}
 	}
 	if stateToClone.lookaroundNFA == stateToClone {
 		clone.lookaroundNFA = clone
 	}
 	clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap)
 	if stateToClone.splitState == stateToClone {
 		clone.splitState = clone
 	}
 	clone.splitState = cloneStateHelper(stateToClone.splitState, cloneMap)
 	if stateToClone.next == stateToClone {
 		clone.next = clone
 	}
 	clone.next = cloneStateHelper(stateToClone.next, cloneMap)
 	return clone
 }
@@ -115,22 +114,26 @@ func resetThreads(start *nfaState) {
 }
 func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	if state == nil {
 		return
 	}
 	if _, ok := visitedMap[state]; ok {
 		return
 	}
 	// Assuming it hasn't been visited
 	state.threadGroups = nil
 	visitedMap[state] = true
-	for _, v := range state.transitions {
+	if state.isAlternation {
-		for _, nextState := range v {
+		resetThreadsHelper(state.next, visitedMap)
-			resetThreadsHelper(nextState, visitedMap)
+		resetThreadsHelper(state.splitState, visitedMap)
-		}
+	} else {
 		resetThreadsHelper(state.next, visitedMap)
 	}
 }
 // Checks if the given state's assertion is true. Returns true if the given
 // state doesn't have an assertion.
-func (s nfaState) checkAssertion(str []rune, idx int) bool {
+func (s nfaState) checkAssertion(str []rune, idx int, preferLongest bool) bool {
 	if s.assert == alwaysTrueAssert {
 		return true
 	}
@@ -180,7 +183,7 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 			strToMatch = string(runesToMatch)
 		}
-		regComp := Reg{startState, s.lookaroundNumCaptureGroups}
+		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex, preferLongest}
 		matchIndices := regComp.FindAll(strToMatch)
 		numMatchesFound := 0
@@ -207,9 +210,12 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 }
 // Returns true if the contents of 's' contain the value at the given index of the given string
-func (s nfaState) contentContains(str []rune, idx int) bool {
+func (s nfaState) contentContains(str []rune, idx int, preferLongest bool) bool {
 	if s.assert != noneAssert {
-		return s.checkAssertion(str, idx)
+		return s.checkAssertion(str, idx, preferLongest)
 	}
 	if idx >= len(str) {
 		return false
 	}
 	if s.allChars {
 		return !slices.Contains(slices.Concat(notDotChars, s.except), str[idx]) // Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
@@ -222,74 +228,84 @@ func (s nfaState) isLookaround() bool {
 	return s.assert == plaAssert || s.assert == plbAssert || s.assert == nlaAssert || s.assert == nlbAssert
 }
 func (s nfaState) numTransitions() int {
 	if s.next == nil && s.splitState == nil {
 		return 0
 	}
 	if s.next == nil || s.splitState == nil {
 		return 1
 	}
 	return 2
 }
 // Returns the matches for the character at the given index of the given string.
 // Also returns the number of matches. Returns -1 if an assertion failed.
-func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
+//func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
-	// Assertions can be viewed as 'checks'. If the check fails, we return
+//	// Assertions can be viewed as 'checks'. If the check fails, we return
-	// an empty array and 0.
+//	// an empty array and 0.
-	// If it passes, we treat it like any other state, and return all the transitions.
+//	// If it passes, we treat it like any other state, and return all the transitions.
-	if s.assert != noneAssert {
+//	if s.assert != noneAssert {
-		if s.checkAssertion(str, idx) == false {
+//		if s.checkAssertion(str, idx) == false {
-			return make([]*nfaState, 0), -1
+//			return make([]*nfaState, 0), -1
-		}
+//		}
-	}
+//	}
-	listTransitions := s.transitions[int(str[idx])]
+//	listTransitions := s.transitions[int(str[idx])]
-	for _, dest := range s.transitions[int(anyCharRune)] {
+//	for _, dest := range s.transitions[int(anyCharRune)] {
-		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
+//		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
-			// Add an allChar state to the list of matches if:
+//			// Add an allChar state to the list of matches if:
-			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
+//			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
-			// 		b. The current character isn't the state's exception list.
+//			// 		b. The current character isn't the state's exception list.
-			listTransitions = append(listTransitions, dest)
+//			listTransitions = append(listTransitions, dest)
-		}
+//		}
-	}
+//	}
-	numTransitions := len(listTransitions)
+//	numTransitions := len(listTransitions)
-	return listTransitions, numTransitions
+//	return listTransitions, numTransitions
-}
+//}
 // verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
-func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
+//func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
-	if len(st.transitions) == 0 {
+//	if st.numTransitions() == 0 {
-		st.isLast = true
+//		st.isLast = true
-		return
+//		return
-	}
+//	}
-	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
+//	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
-	if len(st.transitions) == 1 { // Eg. a*
+//	if st.numTransitions() == 1 { // Eg. a*
-		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
+//		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
-		for _, c := range st.content {
+//		for _, c := range st.content {
-			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
+//			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
-				moreThanOneTrans = true
+//				moreThanOneTrans = true
-			}
+//			}
-		}
+//		}
-		st.isLast = !moreThanOneTrans
+//		st.isLast = !moreThanOneTrans
-	}
+//	}
-
+//
-	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
+//	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
-		transitionDests := make([]*nfaState, 0)
+//		transitionDests := make([]*nfaState, 0)
-		for _, v := range st.transitions {
+//		for _, v := range st.transitions {
-			transitionDests = append(transitionDests, v...)
+//			transitionDests = append(transitionDests, v...)
-		}
+//		}
-		if allEqual(transitionDests...) {
+//		if allEqual(transitionDests...) {
-			st.isLast = true
+//			st.isLast = true
-			return
+//			return
-		}
+//		}
-	}
+//	}
-	if visited[st] == true {
+//	if visited[st] == true {
-		return
+//		return
-	}
+//	}
-	visited[st] = true
+//	visited[st] = true
-	for _, states := range st.transitions {
+//	for _, states := range st.transitions {
-		for i := range states {
+//		for i := range states {
-			if states[i] != st {
+//			if states[i] != st {
-				verifyLastStatesHelper(states[i], visited)
+//				verifyLastStatesHelper(states[i], visited)
-			}
+//			}
-		}
+//		}
-	}
+//	}
-}
+//}
 // verifyLastStates enables the 'isLast' flag for the leaf nodes (last states)
-func verifyLastStates(start []*nfaState) {
+//func verifyLastStates(start []*nfaState) {
-	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
+//	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
-}
+//}
 // Concatenates s1 and s2, returns the start of the concatenation.
 func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
@@ -297,75 +313,84 @@ func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
 		return s2
 	}
 	for i := range s1.output {
-		for _, c := range s2.content { // Create transitions for every element in s1's content to s2'
+		s1.output[i].next = s2
 			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], s2)
 		}
 	}
 	s1.output = s2.output
 	return s1
 }
-func kleene(s1 nfaState) (*nfaState, error) {
+func kleene(s1 *nfaState) (*nfaState, error) {
 	if s1.isEmpty && s1.assert != noneAssert {
 		return nil, fmt.Errorf("previous token is not quantifiable")
 	}
 	toReturn := &nfaState{}
 	toReturn.transitions = make(map[int][]*nfaState)
 	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
 	toReturn.isAlternation = true
 	toReturn.content = newContents(epsilon)
 	toReturn.splitState = s1
 	//	toReturn := &nfaState{}
 	//	toReturn.transitions = make(map[int][]*nfaState)
 	//	toReturn.content = newContents(epsilon)
 	toReturn.isKleene = true
-	toReturn.output = append(toReturn.output, toReturn)
+	toReturn.output = append([]*nfaState{}, toReturn)
 	for i := range s1.output {
-		for _, c := range toReturn.content {
+		s1.output[i].next = toReturn
 			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], toReturn)
 		}
 	}
 	for _, c := range s1.content {
 		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
 	}
 	//	for _, c := range s1.content {
 	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
 	//	}
 	//toReturn.kleeneState = &s1
 	return toReturn, nil
 }
 func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
 	toReturn := &nfaState{}
-	toReturn.transitions = make(map[int][]*nfaState)
+	//	toReturn.transitions = make(map[int][]*nfaState)
 	toReturn.output = append(toReturn.output, s1.output...)
 	toReturn.output = append(toReturn.output, s2.output...)
-	// Unique append is used here (and elsewhere) to ensure that,
+	//	// Unique append is used here (and elsewhere) to ensure that,
-	// for any given transition, a state can only be mentioned once.
+	//	// for any given transition, a state can only be mentioned once.
-	// For example, given the transition 'a', the state 's1' can only be mentioned once.
+	//	// For example, given the transition 'a', the state 's1' can only be mentioned once.
-	// This would lead to multiple instances of the same set of match indices, since both
+	//	// This would lead to multiple instances of the same set of match indices, since both
-	// 's1' states would be considered to match.
+	//	// 's1' states would be considered to match.
-	for _, c := range s1.content {
+	//	for _, c := range s1.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
-	}
+	//	}
-	for _, c := range s2.content {
+	//	for _, c := range s2.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
-	}
+	//	}
 	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
 	toReturn.isAlternation = true
 	toReturn.next = s1
 	toReturn.splitState = s2
 	return toReturn
 }
-func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
+func question(s1 *nfaState) (*nfaState, error) { // Use the fact that ab? == a(b|)
-	s2 := &nfaState{}
+	if s1.isEmpty && s1.assert != noneAssert {
-	s2.transitions = make(map[int][]*nfaState)
+		return nil, fmt.Errorf("previous token is not quantifiable")
-	s2.content = newContents(epsilon)
+	}
-	s2.output = append(s2.output, s2)
+	toReturn := &nfaState{}
-	s2.isEmpty = true
+	toReturn.isEmpty = true
-	s2.isQuestion = true
+	toReturn.isAlternation = true
-	s3 := alternate(s1, s2)
+	toReturn.isQuestion = true
-	return s3
+	toReturn.content = newContents(epsilon)
 	toReturn.splitState = s1
 	toReturn.output = append([]*nfaState{}, toReturn)
 	toReturn.output = append(toReturn.output, s1.output...)
 	//	s2.transitions = make(map[int][]*nfaState)
 	return toReturn, nil
 }
 // Creates and returns a new state with the 'default' values.
 func newState() nfaState {
 	ret := nfaState{
 		output: make([]*nfaState, 0),
-		transitions:     make(map[int][]*nfaState),
+		//		transitions:     make(map[int][]*nfaState),
 		assert:          noneAssert,
 		except:          append([]rune{}, 0),
 		lookaroundRegex: "",
@@ -377,10 +402,40 @@ func newState() nfaState {
 }
 // Creates and returns a state that _always_ has a zero-length match.
-func zeroLengthMatchState() nfaState {
+func zeroLengthMatchState() *nfaState {
-	start := newState()
+	start := &nfaState{}
 	start.content = newContents(epsilon)
 	start.isEmpty = true
 	start.assert = alwaysTrueAssert
 	start.output = append([]*nfaState{}, start)
 	return start
 }
 func (s nfaState) equals(other nfaState) bool {
 	return s.isEmpty == other.isEmpty &&
 		s.isLast == other.isLast &&
 		slices.Equal(s.output, other.output) &&
 		slices.Equal(s.content, other.content) &&
 		s.next == other.next &&
 		s.isKleene == other.isKleene &&
 		s.isQuestion == other.isQuestion &&
 		s.isAlternation == other.isAlternation &&
 		s.splitState == other.splitState &&
 		s.assert == other.assert &&
 		s.allChars == other.allChars &&
 		slices.Equal(s.except, other.except) &&
 		s.lookaroundNFA == other.lookaroundNFA &&
 		s.groupBegin == other.groupBegin &&
 		s.groupEnd == other.groupEnd &&
 		s.groupNum == other.groupNum &&
 		slices.Equal(s.threadGroups, other.threadGroups)
 }
 func stateExists(list []nfaState, s nfaState) bool {
 	for i := range list {
 		if list[i].equals(s) {
 			return true
 		}
 	}
 	return false
 }
--- a/regex/priorityQueue.go
+++ b/regex/priorityQueue.go
@@ -1,76 +0,0 @@
 package regex
 import "container/heap"
 // Implement a priority queue using container/heap
 const (
 	min_priority int = iota
 	zerostate_priority
 	alternation_priority
 	kleene_priority
 	char_priority
 	max_priority
 )
 func getPriority(state *nfaState) int {
 	if state.isKleene {
 		return kleene_priority
 	} else if state.isQuestion || state.isAlternation {
 		return alternation_priority
 	} else {
 		if state.isEmpty {
 			return zerostate_priority
 		} else {
 			return char_priority
 		}
 	}
 }
 type priorQueueItem struct {
 	state    *nfaState
 	priority int
 	index    int
 }
 type priorityQueue []*priorQueueItem
 func (pq priorityQueue) Len() int {
 	return len(pq)
 }
 func (pq priorityQueue) Less(i, j int) bool {
 	if pq[i].priority == pq[j].priority {
 		return pq[i].index > pq[j].index
 	}
 	return pq[i].priority > pq[j].priority // We want max-heap, so we use greater-than
 }
 func (pq priorityQueue) Swap(i, j int) {
 	pq[i], pq[j] = pq[j], pq[i]
 	pq[i].index = i
 	pq[j].index = j
 }
 func (pq *priorityQueue) Push(x any) {
 	length := len(*pq)
 	item := x.(*priorQueueItem)
 	item.index = length
 	*pq = append(*pq, item)
 }
 func (pq *priorityQueue) Pop() any {
 	old := *pq
 	n := len(old)
 	item := old[n-1]
 	old[n-1] = nil
 	item.index = -1
 	*pq = old[0 : n-1]
 	return item
 }
 func (pq *priorityQueue) update(item *priorQueueItem, value *nfaState, priority int) {
 	item.state = value
 	item.priority = priority
 	heap.Fix(pq, item.index)
 }
--- a/regex/range2regex.go
+++ b/regex/range2regex.go
@@ -109,7 +109,7 @@ func range2regex(start int, end int) (string, error) {
 		startSlc := intToSlc(rg.start)
 		endSlc := intToSlc(rg.end)
 		if len(startSlc) != len(endSlc) {
-			return "", fmt.Errorf("Error parsing numeric range")
+			return "", fmt.Errorf("error parsing numeric range")
 		}
 		for i := range startSlc {
 			if startSlc[i] == endSlc[i] {
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -25,7 +25,9 @@ var reTests = []struct {
 	{"a*b", nil, "qwqw", []Group{}},
 	{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
 	{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
-	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
+	// This match will only happen with Longest()
 	// {"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
 	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}},
 	{"b*a*a", nil, "bba", []Group{{0, 3}}},
 	{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
 	{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
@@ -528,7 +530,7 @@ var groupTests = []struct {
 }{
 	{"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
 	{"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
-	{"(0)", nil, "ab", []Match{[]Group{}}},
+	{"(0)", nil, "ab", []Match{}},
 	{"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
 	{"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
 	{"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
@@ -537,10 +539,11 @@ var groupTests = []struct {
 	{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
 	{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	// This match will only happen with Longest()
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	//	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
 	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 3}, {0, 3}, {-1, -1}}}},
 	{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
+	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
 	{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
 	{"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
 	{"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
@@ -578,7 +581,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `bcdd`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, nil, `a`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, nil, `a`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\41`, nil, `a!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
@@ -633,7 +636,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `BCDD`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, []ReFlag{RE_CASE_INSENSITIVE}, `(A, B)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
 	{`(a)(b)c|ab`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}}}},
@@ -701,7 +704,7 @@ func TestFind(t *testing.T) {
 					if len(test.result) == 0 {
 						return // Manually pass the test, because this is the expected behavior
 					} else {
-						t.Errorf("Wanted no match Got %v\n", groupIndex)
+						t.Errorf("Wanted %v Got no matches\n", test.result)
 					}
 				} else {
 					if groupIndex != test.result[0] {
@@ -743,7 +746,7 @@ func TestFindString(t *testing.T) {
 				foundString := regComp.FindString(test.str)
 				if len(test.result) == 0 {
 					if foundString != "" {
-						t.Errorf("Expected no match got %v\n", foundString)
+						t.Errorf("Wanted no match got %v\n", foundString)
 					}
 				} else {
 					expectedString := test.str[test.result[0].StartIdx:test.result[0].EndIdx]
@@ -791,11 +794,68 @@ func TestFindSubmatch(t *testing.T) {
 				}
 			}
 			match, err := regComp.FindSubmatch(test.str)
 			if err != nil {
 				if len(test.result) != 0 {
 					t.Errorf("Wanted %v got no match\n", test.result[0])
 				}
 			} else if len(test.result) == 0 {
 				t.Errorf("Wanted no match got %v\n", match)
 			}
 			for i := range match {
 				if match[i].IsValid() {
 					if test.result[0][i] != match[i] {
 						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 					}
 				} else {
 					if i < len(test.result) && test.result[0][i].IsValid() {
 						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 					}
 				}
 			}
 		})
 	}
 }
 func TestFindStringSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
 			regComp, err := Compile(test.re, test.flags...)
 			if err != nil {
 				if test.result != nil {
 					panic(err)
 				}
 			}
 			matchStr := regComp.FindStringSubmatch(test.str)
 			if matchStr == nil {
 				if len(test.result) != 0 {
 					expectedStr := funcMap(test.result[0], func(g Group) string {
 						if g.IsValid() {
 							return test.str[g.StartIdx:g.EndIdx]
 						} else {
 							return ""
 						}
 					})
 					t.Errorf("Wanted %v got no match\n", expectedStr)
 				}
 			} else if len(test.result) == 0 {
 				t.Errorf("Wanted no match got %v\n", matchStr)
 			} else {
 				expectedStr := funcMap(test.result[0], func(g Group) string {
 					if g.IsValid() {
 						return test.str[g.StartIdx:g.EndIdx]
 					} else {
 						return ""
 					}
 				})
 				for i, groupStr := range matchStr {
 					if groupStr == "" {
 						if i < len(expectedStr) && expectedStr[i] != "" {
 							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
 						}
 					} else {
 						if expectedStr[i] != groupStr {
 							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
 						}
 					}
 				}
 			}
 		})
@@ -817,6 +877,10 @@ func TestFindAllSubmatch(t *testing.T) {
 						if test.result[i][j] != matchIndices[i][j] {
 							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 						}
 					} else {
 						if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
 							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 						}
 					}
 				}
 			}