Updated documentation

Updated README
Added README
2025-02-14 12:07:43 -05:00 · 2025-02-14 12:00:33 -05:00 · 2025-02-14 11:59:43 -05:00 · 2025-02-14 11:37:48 -05:00 · 2025-02-14 11:37:28 -05:00 · 2025-02-14 11:37:14 -05:00
13 changed files with 1267 additions and 731 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,17 @@
 ## Kleingrep
 Kleingrep is a regular expression engine, providing a library and command-line tool written in Go.
 It aims to provide a more featureful engine, compared to the one in
 [Go's standard library](https://pkg.go.dev/regexp), while retaining some semblance of efficiency.
 The engine does __not__ use backtracking, relying on the NFA-based method described in
 [Russ Cox's articles](https://swtch.com/~rsc/regexp). As such, it is immune to catastrophic backtracking.
 It also includes features not present in regexp, such as lookarounds and backreferences.
 ### Syntax
 The syntax is, for the most part, a superset of Go's regexp. A full overview of the syntax can be found [here](https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex#hdr-Syntax).
 __For more information, see https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex__.
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -129,6 +129,8 @@ func main() {
 			matchIndices = regComp.FindAllSubmatch(test_str)
 		}
 		test_str_runes := []rune(test_str) // Converting to runes preserves unicode characters
 		if *printMatchesFlag {
 			// if we are in single line mode, print the line on which
 			// the matches occur
@@ -158,10 +160,10 @@ func main() {
 			oldIndices := indicesToPrint.values()
 			indicesToPrint = new_uniq_arr[int]()
 			// Explanation:
-			// Find all numbers from 0 to len(test_str) that are NOT in oldIndices.
+			// Find all numbers from 0 to len(test_str_runes) that are NOT in oldIndices.
 			// These are the values we want to print, now that we have inverted the match.
 			// Re-initialize indicesToPrint and add all of these values to it.
-			indicesToPrint.add(setDifference(genRange(0, len(test_str)), oldIndices)...)
+			indicesToPrint.add(setDifference(genRange(0, len(test_str_runes)), oldIndices)...)
 		}
 		// If lineFlag is enabled, we should only print something if:
@@ -182,7 +184,7 @@ func main() {
 		//			the corresponding end index.
 		// 		3. If not, just print the character.
 		if substituteFlagEnabled {
-			for i := range test_str {
+			for i := range test_str_runes {
 				inMatchIndex := false
 				for _, m := range matchIndices {
 					if i == m[0].StartIdx {
@@ -193,11 +195,11 @@ func main() {
 					}
 				}
 				if !inMatchIndex {
-					fmt.Fprintf(out, "%c", test_str[i])
+					fmt.Fprintf(out, "%c", test_str_runes[i])
 				}
 			}
 		} else {
-			for i, c := range test_str {
+			for i, c := range test_str_runes {
 				if indicesToPrint.contains(i) {
 					color.New(color.FgRed).Fprintf(out, "%c", c)
 					// Newline after every match - only if -o is enabled and -v is disabled.
--- a/cmd/unique_array.go
+++ b/cmd/unique_array.go
@@ -16,7 +16,6 @@ func (s *uniq_arr[T]) add(vals ...T) {
 			s.backingMap[item] = struct{}{}
 		}
 	}
 	return
 }
 func (s uniq_arr[T]) contains(val T) bool {
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -12,16 +12,43 @@ var notDotChars []rune
 // A Reg represents the result of compiling a regular expression. It contains
 // the startState of the NFA representation of the regex, and the number of capturing
-// groups in the regex.
+// groups in the regex. It also contains the expression string.
 type Reg struct {
-	start     *nfaState
+	start         *nfaState
-	numGroups int
+	numGroups     int
 	str           string
 	preferLongest bool
 }
-// numSubexp eturns the number of sub-expressions in the given [Reg]. This is equivalent
+// NumSubexp returns the number of sub-expressions in the given [Reg]. This is equivalent
 // to the number of capturing groups.
-func (r Reg) NumSubexp() int {
+func (re Reg) NumSubexp() int {
-	return r.numGroups
+	return re.numGroups
 }
 // String returns the string used to compile the expression.
 func (re Reg) String() string {
 	return re.str
 }
 // MarshalText implements [encoding.TextMarshaler]. The output is equivalent to that of [Reg.String].
 // Any flags passed as arguments (including calling [Reg.Longest]) are lost.
 func (re *Reg) MarshalText() ([]byte, error) {
 	return []byte(re.String()), nil
 }
 // UnmarshalText implements [encoding.TextUnmarshaler]. It calls [Reg.Compile] on the given byte-slice. If it returns successfully,
 // then the result of the compilation is stored in re. The result of [Reg.Compile] is returned.
 func (re *Reg) UnmarshalText(text []byte) error {
 	newReg, err := Compile(string(text))
 	if err == nil {
 		*re = newReg
 	}
 	return err
 }
 func (re *Reg) Longest() {
 	re.preferLongest = true
 }
 const concatRune rune = 0xF0001
@@ -37,7 +64,7 @@ const (
 )
 func isOperator(c rune) bool {
-	if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune {
+	if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune || c == lazyPlusRune || c == lazyKleeneRune || c == lazyQuestionRune {
 		return true
 	}
 	return false
@@ -45,7 +72,7 @@ func isOperator(c rune) bool {
 /* priority returns the priority of the given operator */
 func priority(op rune) int {
-	precedence := []rune{'|', concatRune, '+', '*', '?'}
+	precedence := []rune{'|', concatRune, '+', lazyPlusRune, '*', lazyKleeneRune, '?', lazyQuestionRune}
 	return slices.Index(precedence, op)
 }
@@ -81,6 +108,48 @@ func getPOSIXClass(str []rune) (bool, string) {
 	return true, rtv
 }
 // isUnicodeCharClassLetter returns whether or not the given letter represents a unicode character class.
 func isUnicodeCharClassLetter(c rune) bool {
 	return slices.Contains([]rune{'L', 'M', 'S', 'N', 'P', 'C', 'Z'}, c)
 }
 // rangeTableToRuneSlice converts the given range table into a rune slice and returns it.
 func rangeTableToRuneSlice(rangetable *unicode.RangeTable) []rune {
 	var rtv []rune
 	for _, r := range rangetable.R16 {
 		for c := r.Lo; c <= r.Hi; c += r.Stride {
 			rtv = append(rtv, rune(c))
 		}
 	}
 	for _, r := range rangetable.R32 {
 		for c := r.Lo; c <= r.Hi; c += r.Stride {
 			rtv = append(rtv, rune(c))
 		}
 	}
 	return rtv
 }
 // unicodeCharClassToRange converts the given unicode character class name into a list of characters in that class.
 // This class could also be a single letter eg. 'C'.
 func unicodeCharClassToRange(class string) ([]rune, error) {
 	if len(class) == 0 {
 		return nil, fmt.Errorf("empty unicode character class")
 	}
 	if len(class) == 1 || len(class) == 2 {
 		if rangeTable, ok := unicode.Categories[class]; ok {
 			return rangeTableToRuneSlice(rangeTable), nil
 		} else {
 			return nil, fmt.Errorf("invalid short unicode character class")
 		}
 	} else {
 		if rangeTable, ok := unicode.Scripts[class]; ok {
 			return rangeTableToRuneSlice(rangeTable), nil
 		} else {
 			return nil, fmt.Errorf("invalid long unicode character class")
 		}
 	}
 }
 // Stores whether the case-insensitive flag has been enabled.
 var caseInsensitive bool
@@ -139,9 +208,6 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 	//		metacharacter. Later, in thompson(), these will be converted back. This avoids
 	//		confusion in detecting whether a character is escaped eg. detecting
 	// 		whether '\\[a]' has an escaped opening bracket (it doesn't).
 	//
 	// 	5. 	Check for non-greedy operators. These are not supported at the moment, so an error
 	// 		must be thrown if the user attempts to use a non-greedy operator.
 	for i := 0; i < len(re_runes_orig); i++ {
 		c := re_runes_orig[i]
 		if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
@@ -188,8 +254,16 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
 			re_runes = append(re_runes, rbracketRune)
 			continue
-		} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
+		} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i > 0 && re_runes_orig[i-1] != '\\') && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
-			return nil, fmt.Errorf("non-greedy operators are not supported")
+			switch c {
 			case '+':
 				re_runes = append(re_runes, lazyPlusRune)
 			case '*':
 				re_runes = append(re_runes, lazyKleeneRune)
 			case '?':
 				re_runes = append(re_runes, lazyQuestionRune)
 			}
 			i++
 		} else {
 			re_runes = append(re_runes, c)
 		}
@@ -282,17 +356,44 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 					}
 				} else if isHex(re_runes[i]) {
 					re_postfix = append(re_postfix, re_runes[i:i+2]...)
-					i += 2
+					i += 1 // I don't skip forward 2 steps, because the second step will happen with the loop increment
 				} else {
 					return nil, fmt.Errorf("invalid hex value in expression")
 				}
-			} else if isOctal(re_runes[i]) {
+			} else if re_runes[i] == 'p' || re_runes[i] == 'P' { // Unicode character class (P is negated unicode charclass)
 				re_postfix = append(re_postfix, re_runes[i])
 				i++
 				if i >= len(re_runes) {
 					return nil, fmt.Errorf("error parsing unicode character class in expression")
 				}
 				if re_runes[i] == '{' { // Full name charclass
 					for re_runes[i] != '}' {
 						re_postfix = append(re_postfix, re_runes[i])
 						i++
 					}
 					re_postfix = append(re_postfix, re_runes[i])
 					i++
 				} else if isUnicodeCharClassLetter(re_runes[i]) {
 					re_postfix = append(re_postfix, re_runes[i])
 					i++
 				} else {
 					return nil, fmt.Errorf("error parsing unicode character class in expression")
 				}
 				i-- // The loop increment at the top will move us forward
 			} else if re_runes[i] == '0' { // Start of octal value
 				numDigits := 1
-				for i+numDigits < len(re_runes) && numDigits < 3 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 3)
+				for i+numDigits < len(re_runes) && numDigits < 4 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 4, starting with 0)
 					numDigits++
 				}
 				re_postfix = append(re_postfix, re_runes[i:i+numDigits]...)
 				i += (numDigits - 1) // I have to move back a step, so that I can add a concatenation operator if necessary, and so that the increment at the bottom of the loop works as intended
 			} else if unicode.IsDigit(re_runes[i]) { // Any other number - backreference
 				numDigits := 1
 				for i+numDigits < len(re_runes) && unicode.IsDigit(re_runes[i+numDigits]) { // Skip while we see a digit
 					numDigits++
 				}
 				re_postfix = append(re_postfix, re_runes[i:i+numDigits]...)
 				i += (numDigits - 1) // Move back a step to add concatenation operator
 			} else {
 				re_postfix = append(re_postfix, re_runes[i])
 			}
@@ -325,7 +426,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		}
 		if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != nonCapLparenRune && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
 			if i < len(re_runes)-1 {
-				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
+				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != lazyKleeneRune && re_runes[i+1] != '+' && re_runes[i+1] != lazyPlusRune && re_runes[i+1] != '?' && re_runes[i+1] != lazyQuestionRune && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
 					re_postfix = append(re_postfix, concatRune)
 				}
 			}
@@ -337,7 +438,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 	outQueue := make([]postfixNode, 0) // Output queue
 	// Actual algorithm
-	numOpenParens := 0 // Number of open parentheses
+	numOpenParens := 0                               // Number of open parentheses
 	parenIndices := make([]Group, 0)                 // I really shouldn't be using Group here, because that's strictly for matching purposes, but its a convenient way to store the indices of the opening and closing parens.
 	parenIndices = append(parenIndices, Group{0, 0}) // I append a weird value here, because the 0-th group doesn't have any parens. This way, the 1st group will be at index 1, 2nd at 2 ...
 	for i := 0; i < len(re_postfix); i++ {
 		/* Two cases:
 		1. Current character is alphanumeric - send to output queue
@@ -393,11 +496,44 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				} else {
 					return nil, fmt.Errorf("not enough hex characters found in expression")
 				}
-			} else if isOctal(re_postfix[i]) { // Octal value
+			} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
 				charClassInverted := (re_postfix[i] == 'P')
 				charsInClass := []rune{}
 				i++
 				if isUnicodeCharClassLetter(re_postfix[i]) {
 					var err error
 					charsInClass, err = unicodeCharClassToRange(string(re_postfix[i]))
 					if err != nil {
 						return nil, err
 					}
 				} else if re_postfix[i] == '{' {
 					i++ // Skip opening bracket
 					unicodeCharClassStr := ""
 					for re_postfix[i] != '}' {
 						unicodeCharClassStr += string(re_postfix[i])
 						i++
 					}
 					var err error
 					charsInClass, err = unicodeCharClassToRange(unicodeCharClassStr)
 					if err != nil {
 						return nil, err
 					}
 				} else {
 					return nil, fmt.Errorf("error parsing unicode character class in expression")
 				}
 				var toAppend postfixNode
 				if !charClassInverted { // \p
 					toAppend = newPostfixNode(charsInClass...)
 				} else { // \P
 					toAppend = newPostfixDotNode()
 					toAppend.except = append([]postfixNode{}, newPostfixNode(charsInClass...))
 				}
 				outQueue = append(outQueue, toAppend)
 			} else if re_postfix[i] == '0' { // Octal value
 				var octVal int64
 				var octValStr string
 				numDigitsParsed := 0
-				for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 {
+				for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 4 {
 					octValStr += string(re_postfix[i+numDigitsParsed])
 					numDigitsParsed++
 				}
@@ -410,6 +546,20 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				}
 				i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically
 				outQueue = append(outQueue, newPostfixCharNode(rune(octVal)))
 			} else if unicode.IsDigit(re_postfix[i]) { // Backreference
 				var num int64
 				var numStr string
 				numDigitsParsed := 0
 				for (i+numDigitsParsed) < len(re_postfix) && unicode.IsDigit(re_postfix[i+numDigitsParsed]) {
 					numStr += string(re_postfix[i+numDigitsParsed])
 					numDigitsParsed++
 				}
 				num, err := strconv.ParseInt(numStr, 10, 32)
 				if err != nil {
 					return nil, fmt.Errorf("error parsing backreference in expresion")
 				}
 				i += numDigitsParsed - 1
 				outQueue = append(outQueue, newPostfixBackreferenceNode(int(num)))
 			} else {
 				escapedNode, err := newEscapedNode(re_postfix[i], false)
 				if err != nil {
@@ -561,11 +711,44 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 						} else {
 							return nil, fmt.Errorf("not enough hex characters found in character class")
 						}
-					} else if isOctal(re_postfix[i]) { // Octal value
+					} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
 						charClassInverted := (re_postfix[i] == 'P')
 						charsInList := []rune{}
 						i++
 						if isUnicodeCharClassLetter(re_postfix[i]) {
 							var err error
 							charsInList, err = unicodeCharClassToRange(string(re_postfix[i]))
 							if err != nil {
 								return nil, err
 							}
 						} else if re_postfix[i] == '{' {
 							i++ // Skip opening bracket
 							unicodeCharClassStr := ""
 							for re_postfix[i] != '}' {
 								unicodeCharClassStr += string(re_postfix[i])
 								i++
 							}
 							var err error
 							charsInList, err = unicodeCharClassToRange(unicodeCharClassStr)
 							if err != nil {
 								return nil, err
 							}
 						} else {
 							return nil, fmt.Errorf("error parsing unicode character class in expression")
 						}
 						if !charClassInverted {
 							chars = append(chars, newPostfixNode(charsInList...))
 						} else {
 							toAppend := newPostfixDotNode()
 							toAppend.except = append([]postfixNode{}, newPostfixNode(charsInList...))
 							chars = append(chars, toAppend)
 						}
 					} else if re_postfix[i] == '0' { // Octal value
 						var octVal int64
 						var octValStr string
 						numDigitsParsed := 0
-						for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
+						for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 4 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
 							octValStr += string(re_postfix[i+numDigitsParsed])
 							numDigitsParsed++
 						}
@@ -762,6 +945,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 			}
 			outQueue[idx].startReps = startRangeNum
 			outQueue[idx].endReps = endRangeNum
 			if i < len(re_postfix)-1 && re_postfix[i+1] == '?' { // lazy repitition
 				outQueue[idx].isLazy = true
 				i++
 			}
 		}
 		if c == '(' || c == nonCapLparenRune {
 			opStack = append(opStack, c)
@@ -769,6 +956,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				outQueue = append(outQueue, newPostfixNode(c))
 			}
 			numOpenParens++
 			parenIndices = append(parenIndices, Group{StartIdx: len(outQueue) - 1}) // Push the index of the lparen into parenIndices
 		}
 		if c == ')' {
 			// Keep popping from opStack until we encounter an opening parantheses or a NONCAPLPAREN_CHAR. Throw error if we reach the end of the stack.
@@ -785,6 +973,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 			if val == '(' {       // Whatever was inside the parentheses was a _capturing_ group, so we append the closing parentheses as well
 				outQueue = append(outQueue, newPostfixNode(')')) // Add closing parentheses
 			}
 			parenIndices[numOpenParens].EndIdx = len(outQueue) - 1
 			numOpenParens--
 		}
 	}
@@ -799,6 +988,11 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		return nil, fmt.Errorf("imbalanced parantheses")
 	}
 	//	outQueue, _, err := rewriteBackreferences(outQueue, parenIndices)
 	//	if err != nil {
 	//		return nil, err
 	//	}
 	return outQueue, nil
 }
@@ -816,13 +1010,12 @@ func thompson(re []postfixNode) (Reg, error) {
 	// In these cases, we will return an NFA with 1 state, with an assertion that is always true.
 	if len(re) == 0 {
 		start := zeroLengthMatchState()
-		nfa = append(nfa, &start)
+		nfa = append(nfa, start)
 	}
 	for _, c := range re {
 		if c.nodetype == characterNode || c.nodetype == assertionNode {
 			stateToAdd := nfaState{}
 			stateToAdd.transitions = make(map[int][]*nfaState)
 			if c.allChars {
 				stateToAdd.allChars = true
 				if len(c.except) != 0 {
@@ -934,7 +1127,6 @@ func thompson(re []postfixNode) (Reg, error) {
 			s.isEmpty = true
 			s.output = make([]*nfaState, 0)
 			s.output = append(s.output, s)
 			s.transitions = make(map[int][]*nfaState)
 			// LPAREN nodes are just added normally
 			if c.nodetype == lparenNode {
 				numGroups++
@@ -966,7 +1158,7 @@ func thompson(re []postfixNode) (Reg, error) {
 					s.groupNum = lparenNode.groupNum
 					to_add := concatenate(lparenNode, s)
 					nfa = append(nfa, to_add)
-				} else if middleNode.groupBegin && len(middleNode.transitions) == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
+				} else if middleNode.groupBegin && middleNode.numTransitions() == 0 { // The middle node is a lone lparen - something like '(())', and I'm looking at the first rparen
 					nfa = append(nfa, lparenNode)    // I shouldn't have popped this out, because it is not involved in the current capturing group
 					s.groupNum = middleNode.groupNum // In this case, the 'middle' node is actually an lparen
 					to_add := concatenate(middleNode, s)
@@ -989,7 +1181,8 @@ func thompson(re []postfixNode) (Reg, error) {
 		if c.nodetype == charclassNode { // A Character class consists of all the nodes in it, alternated
 			// Map the list of nodes to a list of states, each state containing the contents of a specific node
 			states := funcMap(c.nodeContents, func(node postfixNode) *nfaState {
-				s := newState()
+				s := &nfaState{}
 				s.output = append(s.output, s)
 				nodeContents := node.contents
 				if caseInsensitive {
 					nodeContents = slices.Concat(funcMap(nodeContents, func(r rune) []rune {
@@ -1003,7 +1196,7 @@ func thompson(re []postfixNode) (Reg, error) {
 						return n.contents
 					})...)
 				}
-				return &s
+				return s
 			})
 			// Reduce the list of states down to a single state by alternating them
 			toAdd := funcReduce(states, func(s1 *nfaState, s2 *nfaState) *nfaState {
@@ -1011,6 +1204,21 @@ func thompson(re []postfixNode) (Reg, error) {
 			})
 			nfa = append(nfa, toAdd)
 		}
 		if c.nodetype == backreferenceNode {
 			if c.referencedGroup > numGroups {
 				return Reg{}, fmt.Errorf("invalid backreference")
 			}
 			stateToAdd := &nfaState{}
 			stateToAdd.assert = noneAssert
 			stateToAdd.content = newContents(epsilon)
 			stateToAdd.isEmpty = true
 			stateToAdd.isBackreference = true
 			stateToAdd.output = make([]*nfaState, 0)
 			stateToAdd.output = append(stateToAdd.output, stateToAdd)
 			stateToAdd.referredGroup = c.referencedGroup
 			stateToAdd.threadBackref = 0
 			nfa = append(nfa, stateToAdd)
 		}
 		// Must be an operator if it isn't a character
 		switch c.nodetype {
 		case concatenateNode:
@@ -1030,17 +1238,23 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, fmt.Errorf("error applying kleene star")
 			}
-			stateToAdd, err := kleene(*s1)
+			stateToAdd, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
 			if c.isLazy {
 				stateToAdd.isLazy = true
 			}
 			nfa = append(nfa, stateToAdd)
 		case plusNode: // a+ is equivalent to aa*
 			s1 := mustPop(&nfa)
-			s2, err := kleene(*s1)
+			s2, err := kleene(s1)
 			if err != nil {
 				return Reg{}, err
 			}
 			if c.isLazy {
 				s2.isLazy = true
 			}
 			s1 = concatenate(s1, s2)
 			nfa = append(nfa, s1)
 		case questionNode: // ab? is equivalent to a(b|)
@@ -1048,7 +1262,13 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, fmt.Errorf("error applying question operator")
 			}
-			s2 := question(s1)
+			s2, err := question(s1)
 			if err != nil {
 				return Reg{}, err
 			}
 			if c.isLazy {
 				s2.isLazy = true
 			}
 			nfa = append(nfa, s2)
 		case pipeNode:
 			// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
@@ -1059,21 +1279,21 @@ func thompson(re []postfixNode) (Reg, error) {
 			// 	'|a'
 			// 	'^a|'
 			// 	'^|a'
-			s1, err1 := pop(&nfa)
+			s2, err1 := pop(&nfa)
-			s2, err2 := pop(&nfa)
+			s1, err2 := pop(&nfa)
-			if err2 != nil || (s2.groupBegin && len(s2.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err2 != nil || (s2.groupBegin && s2.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err2 == nil { // Roundabout way of saying that this node existed, but it was an LPAREN, so we append it back
 					nfa = append(nfa, s2)
 				}
 				tmp := zeroLengthMatchState()
-				s2 = &tmp
+				s2 = tmp
 			}
-			if err1 != nil || (s1.groupBegin && len(s1.transitions) == 0) { // Doesn't exist, or its just an LPAREN
+			if err1 != nil || (s1.groupBegin && s1.numTransitions() == 0) { // Doesn't exist, or its just an LPAREN
 				if err1 == nil { // See above for explanation
 					nfa = append(nfa, s1)
 				}
 				tmp := zeroLengthMatchState()
-				s1 = &tmp
+				s1 = tmp
 			}
 			s3 := alternate(s1, s2)
 			nfa = append(nfa, s3)
@@ -1100,14 +1320,24 @@ func thompson(re []postfixNode) (Reg, error) {
 				stateToAdd = concatenate(stateToAdd, cloneState(poppedState))
 			}
 			if c.endReps == infinite_reps { // Case 3
-				s2, err := kleene(*poppedState)
+				s2, err := kleene(poppedState)
 				if err != nil {
 					return Reg{}, err
 				}
 				if c.isLazy {
 					s2.isLazy = true
 				}
 				stateToAdd = concatenate(stateToAdd, s2)
 			} else { // Case 2
 				for i := c.startReps; i < c.endReps; i++ {
-					stateToAdd = concatenate(stateToAdd, question(cloneState(poppedState)))
+					tmp, err := question(cloneState(poppedState))
 					if err != nil {
 						return Reg{}, fmt.Errorf("error processing bounded repetition")
 					}
 					if c.isLazy {
 						tmp.isLazy = true
 					}
 					stateToAdd = concatenate(stateToAdd, tmp)
 				}
 			}
 			nfa = append(nfa, stateToAdd)
@@ -1117,9 +1347,13 @@ func thompson(re []postfixNode) (Reg, error) {
 		return Reg{}, fmt.Errorf("invalid regex")
 	}
-	verifyLastStates(nfa)
+	lastState := newState()
 	lastState.isLast = true
-	return Reg{nfa[0], numGroups}, nil
+	concatenate(nfa[0], &lastState)
 	// The string is empty here, because we add it in Compile()
 	return Reg{nfa[0], numGroups, "", false}, nil
 }
@@ -1137,10 +1371,11 @@ func Compile(re string, flags ...ReFlag) (Reg, error) {
 	if err != nil {
 		return Reg{}, fmt.Errorf("error compiling regex: %w", err)
 	}
 	reg.str = re
 	return reg, nil
 }
-// MustCompile panicks if Compile returns an error. They are identical in all other respects.
+// MustCompile panics if Compile returns an error. They are identical in all other respects.
 func MustCompile(re string, flags ...ReFlag) Reg {
 	reg, err := Compile(re, flags...)
 	if err != nil {
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -4,6 +4,8 @@ Package regex implements regular expression search, using a custom non-bracktrac
 The engine relies completely on UTF-8 codepoints. As such, it is capable of matching characters
 from other languages, emojis and symbols.
 The API and regex syntax are largely compatible with that of the stdlib's [regexp], with a few key differences (see 'Key Differences with regexp').
 The full syntax is specified below.
 # Syntax
@@ -16,7 +18,7 @@ Single characters:
 	[^abc]			Negated character class - match any character except a, b and c
 	[^a-z]			Negated character range - do not match any character from a to z
 	\[				Match a literal '['. Backslashes can escape any character with special meaning, including another backslash.
-	\452			Match the character with the octal value 452 (up to 3 digits)
+	\0452			Match the character with the octal value 452 (up to 4 digits, first digit must be 0)
 	\xFF			Match the character with the hex value FF (exactly 2 characters)
 	\x{0000FF}		Match the character with the hex value 0000FF (exactly 6 characters)
 	\n				Newline
@@ -31,7 +33,7 @@ Perl classes:
 	\d				Match any digit character ([0-9])
 	\D				Match any non-digit character ([^0-9])
 	\w				Match any word character ([a-zA-Z0-9_])
-	\W				Match any word character ([^a-zA-Z0-9_])
+	\W				Match any non-word character ([^a-zA-Z0-9_])
 	\s				Match any whitespace character ([ \t\n])
 	\S				Match any non-whitespace character ([^ \t\n])
@@ -55,17 +57,27 @@ POSIX classes (inside normal character classes):
 Composition:
 	def				Match d, followed by e, followed by f
-	x|y				Match x or y (prefer longer one)
+	x|y				Match x or y (prefer x)
-	xy|z			Match xy or z
+	xy|z			Match xy or z (prefer xy)
-Repitition (always greedy, preferring more):
+Repitition:
-	x*				Match x zero or more times
+	Greedy:
-	x+				Match x one or more times
+	x*				Match x zero or more times, prefer more
-	x?				Match x zero or one time
+	x+				Match x one or more times, prefer more
-	x{m,n}			Match x between m and n times (inclusive)
+	x?				Match x zero or one time, prefer one
-	x{m,}			Match x atleast m times
+	x{m,n}			Match x between m and n times (inclusive), prefer more
-	x{,n}			Match x between 0 and n times (inclusive)
+	x{m,}			Match x atleast m times, prefer more
 	x{,n}			Match x between 0 and n times (inclusive), prefer more
 	x{m}			Match x exactly m times
 	Lazy:
 	x*?				Match x zero or more times, prefer fewer
 	x+?				Match x one or more times, prefer fewer
 	x??				Match x zero or one time, prefer zero
 	x{m,n}?			Match x between m and n times (inclusive), prefer fewer
 	x{m,}?			Match x atleast m times, prefer fewer
 	x{,n}?			Match x between 0 and n times (inclusive), prefer fewer
 	x{m}			Match x exactly m times
 Grouping:
@@ -91,48 +103,33 @@ Lookarounds:
 	(?<=x)y			Positive lookbehind - Match y if preceded by x
 	(?<!x)y			Negative lookbehind - Match y if NOT preceded by x
 Backreferences:
 	(xy)\1			Match 'xy' followed by the text most recently captured by group 1 (in this case, 'xy')
 Numeric ranges:
 	<x-y>			Match any number from x to y (inclusive) (x and y must be positive numbers)
 	\<x				Match a literal '<' followed by x
 # Key Differences with regexp
-The engine and the API differ from [regexp] in a number of ways, some of them very subtle.
+The engine and the API differ from [regexp] in a few ways, some of them very subtle.
 The key differences are mentioned below.
-1. Greediness:
+1. Byte-slices and runes:
 This engine does not support non-greedy operators. All operators are always greedy in nature, and will try
 to match as much as they can, while still allowing for a successful match. For example, given the regex:
 	y*y
 The engine will match as many 'y's as it can, while still allowing the trailing 'y' to be matched.
 Another, more subtle example is the following regex:
 	x|xx
 While the stdlib implementation (and most other engines) will prefer matching the first item of the alternation,
 this engine will go for the longest possible match, regardless of the order of the alternation. Although this
 strays from the convention, it results in a nice rule-of-thumb - the engine is ALWAYS greedy.
 The stdlib implementation has a function [regexp.Regexp.Longest] which makes future searches prefer the longest match.
 That is the default (and unchangable) behavior in this engine.
 2. Byte-slices and runes:
 My engine does not support byte-slices. When a matching function receives a string, it converts it into a
 rune-slice to iterate through it. While this has some space overhead, the convenience of built-in unicode
 support made the tradeoff worth it.
-3. Return values
+2. Return values
 Rather than using primitives for return values, my engine defines two types that are used as return
 values: a [Group] represents a capturing group, and a [Match] represents a list of groups.
 [regexp] specifies a regular expression that gives a list of all the matching functions that it supports. The
-equivalent expression for this engine is:
+equivalent expression for this engine is shown below. Note that 'Index' is the default.
 	Find(All)?(String)?(Submatch)?
@@ -140,7 +137,7 @@ equivalent expression for this engine is:
 If a function contains 'All' it returns all matches instead of just the leftmost one.
-If a function contains 'String' it returns the matched text, rather than the indices.
+If a function contains 'String' it returns the matched text, rather than the index in the string.
 If a function contains 'Submatch' it returns the match, including all submatches found by
 capturing groups.
@@ -156,5 +153,20 @@ and the input string:
 The 0th group would contain 'xy' and the 1st group would contain 'y'. Any matching function without 'Submatch' in its name
 returns the 0-group.
 # Feature Differences
 The following features from [regexp] are (currently) NOT supported:
 1. Named capturing groups
 2. Negated POSIX classes
 3. Embedded flags (flags are instead passed as arguments to [Compile])
 4. Literal text with \Q ... \E
 The following features are not available in [regexp], but are supported in my engine:
 1. Lookarounds
 2. Numeric ranges
 3. Backreferences
 I hope to shorten the first list, and expand the second.
 */
 package regex
--- a/regex/example_test.go
+++ b/regex/example_test.go
@@ -2,6 +2,7 @@ package regex_test
 import (
 	"fmt"
 	"strings"
 	"gitea.twomorecents.org/Rockingcool/kleingrep/regex"
 )
@@ -32,12 +33,12 @@ func ExampleReg_FindAll() {
 }
 func ExampleReg_FindString() {
-	regexStr := `\d+`
+	regexStr := `\w+\s+(?=sheep)`
 	regexComp := regex.MustCompile(regexStr)
-	matchStr := regexComp.FindString("The year of our lord, 2025")
+	matchStr := regexComp.FindString("pink cows and yellow sheep")
 	fmt.Println(matchStr)
-	// Output: 2025
+	// Output: yellow
 }
 func ExampleReg_FindSubmatch() {
@@ -52,3 +53,129 @@ func ExampleReg_FindSubmatch() {
 	// 0	1
 	// 2	3
 }
 func ExampleReg_FindStringSubmatch() {
 	regexStr := `(\d{4})-(\d{2})-(\d{2})`
 	regexComp := regex.MustCompile(regexStr)
 	inputStr := `The date is 2025-02-10`
 	match := regexComp.FindStringSubmatch(inputStr)
 	fmt.Println(match[1])
 	fmt.Println(match[3])
 	// Output: 2025
 	// 10
 }
 func ExampleReg_FindAllSubmatch() {
 	regexStr := `(\d)\.(\d)(\d)`
 	regexComp := regex.MustCompile(regexStr)
 	matches := regexComp.FindAllSubmatch("3.14+8.97")
 	fmt.Println(matches[0][0]) // 0-group (entire match) of 1st match (0-indexed)
 	fmt.Println(matches[0][1]) // 1st group of 1st match
 	fmt.Println(matches[1][0]) // 0-group of 2nd match
 	fmt.Println(matches[1][1]) // 1st group of 2nd math
 	// Output: 0	4
 	// 0	1
 	// 5	9
 	// 5	6
 }
 func ExampleReg_FindAllString() {
 	regexStr := `<0-255>\.<0-255>\.<0-255>\.<0-255>`
 	inputStr := `192.168.220.7 pings 9.9.9.9`
 	regexComp := regex.MustCompile(regexStr)
 	matchStrs := regexComp.FindAllString(inputStr)
 	fmt.Println(matchStrs[0])
 	fmt.Println(matchStrs[1])
 	// Output: 192.168.220.7
 	// 9.9.9.9
 }
 func ExampleReg_FindAllStringSubmatch() {
 	// 'https' ...
 	// followed by 1 or more alphanumeric characters (including period) ...
 	// then a forward slash ...
 	// followed by one more of :
 	// 		word character,
 	// 		question mark,
 	// 		period,
 	// 		equals sign
 	regexStr := `https://([a-z0-9\.]+)/([\w.?=]+)`
 	regexComp := regex.MustCompile(regexStr, regex.RE_CASE_INSENSITIVE)
 	inputStr := `You can find me at https://twomorecents.org/index.html and https://news.ycombinator.com/user?id=aadhavans`
 	matchIndices := regexComp.FindAllStringSubmatch(inputStr)
 	fmt.Println(matchIndices[0][1]) // 1st group of 1st match (0-indexed)
 	fmt.Println(matchIndices[0][2]) // 2nd group of 1st match
 	fmt.Println(matchIndices[1][1]) // 1st group of 2nd match
 	fmt.Println(matchIndices[1][2]) // 2nd group of 2nd match
 	// Output: twomorecents.org
 	// index.html
 	// news.ycombinator.com
 	// user?id=aadhavans
 }
 func ExampleReg_Expand() {
 	inputStr := `option1: value1
 	option2: value2`
 	regexStr := `(\w+): (\w+)`
 	templateStr := "$1 = $2\n"
 	regexComp := regex.MustCompile(regexStr, regex.RE_MULTILINE)
 	result := ""
 	for _, submatches := range regexComp.FindAllSubmatch(inputStr) {
 		result = regexComp.Expand(result, templateStr, inputStr, submatches)
 	}
 	fmt.Println(result)
 	// Output: option1 = value1
 	// option2 = value2
 }
 func ExampleReg_LiteralPrefix() {
 	regexStr := `a(b|c)d*`
 	regexComp := regex.MustCompile(regexStr)
 	prefix, complete := regexComp.LiteralPrefix()
 	fmt.Println(prefix)
 	fmt.Println(complete)
 	// Output: a
 	// false
 }
 func ExampleReg_Longest() {
 	regexStr := `x|xx`
 	inputStr := "xx"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.FindString(inputStr))
 	regexComp.Longest()
 	fmt.Println(regexComp.FindString(inputStr))
 	// Output: x
 	// xx
 }
 func ExampleReg_ReplaceAll() {
 	regexStr := `(\d)(\w)`
 	inputStr := "5d9t"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAll(inputStr, `$2$1`))
 	// Output: d5t9
 }
 func ExampleReg_ReplaceAllLiteral() {
 	regexStr := `fox|dog`
 	inputStr := "the quick brown fox jumped over the lazy dog"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAllLiteral(inputStr, `duck`))
 	// Output: the quick brown duck jumped over the lazy duck
 }
 func ExampleReg_ReplaceAllFunc() {
 	regexStr := `\w{5,}`
 	inputStr := `all five or more letter words in this string are capitalized`
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAllFunc(inputStr, strings.ToUpper))
 	// Output: all five or more LETTER WORDS in this STRING are CAPITALIZED
 }
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -1,10 +1,9 @@
 package regex
 import (
 	"container/heap"
 	"fmt"
-	"slices"
+	"strconv"
-	"sort"
+	"unicode"
 )
 // A Match represents a match found by the regex in a given string.
@@ -16,7 +15,7 @@ import (
 // See [Reg.FindSubmatch] for an example.
 type Match []Group
-// a Group represents a group. It contains the start index and end index of the match
+// a Group represents a capturing group. It contains the start and index of the group.
 type Group struct {
 	StartIdx int
 	EndIdx   int
@@ -31,17 +30,6 @@ func newMatch(size int) Match {
 	return toRet
 }
 // Returns the number of valid groups in the match
 func (m Match) numValidGroups() int {
 	numValid := 0
 	for _, g := range m {
 		if g.StartIdx >= 0 && g.EndIdx >= 0 {
 			numValid++
 		}
 	}
 	return numValid
 }
 // Returns a string containing the indices of all (valid) groups in the match
 func (m Match) String() string {
 	var toRet string
@@ -60,7 +48,7 @@ func (idx Group) String() string {
 	return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
 }
-// Returns whether a group is valid (ie. whether it matched any text). It
+// IsValid returns whether a group is valid (ie. whether it matched any text). It
 // simply ensures that both indices of the group are >= 0.
 func (g Group) IsValid() bool {
 	return g.StartIdx >= 0 && g.EndIdx >= 0
@@ -71,101 +59,42 @@ func getZeroGroup(m Match) Group {
 	return m[0]
 }
-// takeZeroState takes the 0-state (if such a transition exists) for all states in the
+func copyThread(to *nfaState, from nfaState) {
-// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
+	to.threadGroups = append([]Group{}, from.threadGroups...)
 // the second ret val is true.
 // If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
 func takeZeroState(states []*nfaState, numGroups int, idx int) (rtv []*nfaState, isZero bool) {
 	for _, state := range states {
 		if len(state.transitions[epsilon]) > 0 {
 			for _, s := range state.transitions[epsilon] {
 				if s.threadGroups == nil {
 					s.threadGroups = newMatch(numGroups + 1)
 				}
 				copy(s.threadGroups, state.threadGroups)
 				if s.groupBegin {
 					s.threadGroups[s.groupNum].StartIdx = idx
 					//					openParenGroups = append(openParenGroups, s.groupNum)
 				}
 				if s.groupEnd {
 					s.threadGroups[s.groupNum].EndIdx = idx
 					//					closeParenGroups = append(closeParenGroups, s.groupNum)
 				}
 			}
 			rtv = append(rtv, state.transitions[epsilon]...)
 		}
 	}
 	for _, state := range rtv {
 		if len(state.transitions[epsilon]) > 0 {
 			return rtv, true
 		}
 	}
 	return rtv, false
 }
 // zeroMatchPossible returns true if a zero-length match is possible
 // from any of the given states, given the string and our position in it.
 // It uses the same algorithm to find zero-states as the one inside the loop,
 // so I should probably put it in a function.
 func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*nfaState) bool {
 	zeroStates, isZero := takeZeroState(states, numGroups, idx)
 	tempstates := make([]*nfaState, 0, len(zeroStates)+len(states))
 	tempstates = append(tempstates, states...)
 	tempstates = append(tempstates, zeroStates...)
 	num_appended := 0 // number of unique states addded to tempstates
 	for isZero == true {
 		zeroStates, isZero = takeZeroState(tempstates, numGroups, idx)
 		tempstates, num_appended = uniqueAppend(tempstates, zeroStates...)
 		if num_appended == 0 { // break if we haven't appended any more unique values
 			break
 		}
 	}
 	for _, state := range tempstates {
 		if state.isEmpty && (state.assert == noneAssert || state.checkAssertion(str, idx)) && state.isLast {
 			return true
 		}
 	}
 	return false
 }
 // Prunes the slice by removing overlapping indices.
 func pruneIndices(indices []Match) []Match {
 	// First, sort the slice by the start indices
 	sort.Slice(indices, func(i, j int) bool {
 		return indices[i][0].StartIdx < indices[j][0].StartIdx
 	})
 	toRet := make([]Match, 0, len(indices))
 	current := indices[0]
 	for _, idx := range indices[1:] {
 		// idx doesn't overlap with current (starts after current ends), so add current to result
 		// and update the current.
 		if idx[0].StartIdx >= current[0].EndIdx {
 			toRet = append(toRet, current)
 			current = idx
 		} else if idx[0].EndIdx > current[0].EndIdx {
 			// idx overlaps, but it is longer, so update current
 			current = idx
 		}
 	}
 	// Add last state
 	toRet = append(toRet, current)
 	return toRet
 }
 // Find returns the 0-group of the leftmost match of the regex in the given string.
 // An error value != nil indicates that no match was found.
-func (regex Reg) Find(str string) (Group, error) {
+func (re Reg) Find(str string) (Group, error) {
-	match, err := regex.FindNthMatch(str, 1)
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Group{}, fmt.Errorf("no matches found")
 	}
 	return getZeroGroup(match), nil
 }
 // Match returns a boolean value, indicating whether the regex found a match in the given string.
 func (re Reg) Match(str string) bool {
 	_, err := re.Find(str)
 	return err == nil
 }
 // CompileMatch compiles expr and returns true if str contains a match of the expression.
 // It is equivalent to [regexp.Match].
 // An optional list of flags may be provided (see [ReFlag]).
 // It returns an error (!= nil) if there was an error compiling the expression.
 func CompileMatch(expr string, str string, flags ...ReFlag) (bool, error) {
 	re, err := Compile(expr, flags...)
 	if err != nil {
 		return false, err
 	}
 	return re.Match(str), nil
 }
 // FindAll returns a slice containing all the 0-groups of the regex in the given string.
 // A 0-group represents the match without any submatches.
-func (regex Reg) FindAll(str string) []Group {
+func (re Reg) FindAll(str string) []Group {
-	indices := regex.FindAllSubmatch(str)
+	indices := re.FindAllSubmatch(str)
 	zeroGroups := funcMap(indices, getZeroGroup)
 	return zeroGroups
 }
@@ -174,8 +103,8 @@ func (regex Reg) FindAll(str string) []Group {
 // The return value will be an empty string in two situations:
 //  1. No match was found
 //  2. The match was an empty string
-func (regex Reg) FindString(str string) string {
+func (re Reg) FindString(str string) string {
-	match, err := regex.FindNthMatch(str, 1)
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return ""
 	}
@@ -188,8 +117,8 @@ func (regex Reg) FindString(str string) string {
 // number of groups. The validity of a group (whether or not it matched anything) can be determined with
 // [Group.IsValid], or by checking that both indices of the group are >= 0.
 // The second-return value is nil if no match was found.
-func (regex Reg) FindSubmatch(str string) (Match, error) {
+func (re Reg) FindSubmatch(str string) (Match, error) {
-	match, err := regex.FindNthMatch(str, 1)
+	match, err := re.FindNthMatch(str, 1)
 	if err != nil {
 		return Match{}, fmt.Errorf("no match found")
 	} else {
@@ -197,11 +126,41 @@ func (regex Reg) FindSubmatch(str string) (Match, error) {
 	}
 }
-// FindAllString is the 'all' version of FindString.
+// FindStringSubmatch is the 'string' version of [FindSubmatch]. It returns a slice of strings,
 // where the string at index i contains the text matched by the i-th capturing group.
 // The 0-th index represents the entire match.
 // An empty string at index n could mean:
 // ,
 //  1. Group n did not find a match
 //  2. Group n found a zero-length match
 //
 // A return value of nil indicates no match.
 func (re Reg) FindStringSubmatch(str string) []string {
 	matchStr := make([]string, re.numGroups+1)
 	match, err := re.FindSubmatch(str)
 	if err != nil {
 		return nil
 	}
 	nonEmptyMatchFound := false
 	for i := range match {
 		if match[i].IsValid() {
 			matchStr[i] = str[match[i].StartIdx:match[i].EndIdx]
 			nonEmptyMatchFound = true
 		} else {
 			matchStr[i] = ""
 		}
 	}
 	if nonEmptyMatchFound == false {
 		return nil
 	}
 	return matchStr
 }
 // FindAllString is the 'all' version of [FindString].
 // It returns a slice of strings containing the text of all matches of
 // the regex in the given string.
-func (regex Reg) FindAllString(str string) []string {
+func (re Reg) FindAllString(str string) []string {
-	zerogroups := regex.FindAll(str)
+	zerogroups := re.FindAll(str)
 	matchStrs := funcMap(zerogroups, func(g Group) string {
 		return str[g.StartIdx:g.EndIdx]
 	})
@@ -210,14 +169,14 @@ func (regex Reg) FindAllString(str string) []string {
 // FindNthMatch return the 'n'th match of the regex in the given string.
 // It returns an error (!= nil) if there are fewer than 'n' matches in the string.
-func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
+func (re Reg) FindNthMatch(str string, n int) (Match, error) {
 	idx := 0
 	matchNum := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			matchNum++
 		}
@@ -230,31 +189,90 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
 }
 // FindAllSubmatch returns a slice of matches in the given string.
-func (regex Reg) FindAllSubmatch(str string) []Match {
+func (re Reg) FindAllSubmatch(str string) []Match {
 	idx := 0
 	str_runes := []rune(str)
 	var matchFound bool
 	var matchIdx Match
 	indices := make([]Match, 0)
 	for idx <= len(str_runes) {
-		matchFound, matchIdx, idx = findAllSubmatchHelper(regex.start, str_runes, idx, regex.numGroups)
+		matchFound, matchIdx, idx = findAllSubmatchHelper(re.start, str_runes, idx, re.numGroups, re.preferLongest)
 		if matchFound {
 			indices = append(indices, matchIdx)
 		}
 	}
 	if len(indices) > 0 {
 		return pruneIndices(indices)
 	}
 	return indices
 }
 // FindAllSubmatch returns a double-slice of strings. Each slice contains the text of a match, including all submatches.
 // A return value of nil indicates no match.
 func (re Reg) FindAllStringSubmatch(str string) [][]string {
 	match := re.FindAllSubmatch(str)
 	if len(match) == 0 {
 		return nil
 	}
 	rtv := make([][]string, len(match))
 	for i := range rtv {
 		rtv[i] = make([]string, re.numGroups+1)
 	}
 	rtv = funcMap(match, func(m Match) []string {
 		return funcMap(m, func(g Group) string {
 			if g.IsValid() {
 				return str[g.StartIdx:g.EndIdx]
 			} else {
 				return ""
 			}
 		})
 	})
 	return rtv
 }
 func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
 	if stateExists(list, state) || stateExists(visited, state) {
 		return list
 	}
 	visited = append(visited, state)
 	if (state.isKleene || state.isQuestion) && (state.isLazy == false) { // Greedy quantifiers
 		copyThread(state.splitState, state)
 		list := addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		copyThread(state.next, state)
 		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		return list
 	}
 	if state.isAlternation || ((state.isKleene || state.isQuestion) && state.isLazy) { // Alternation or lazy quantifier
 		copyThread(state.next, state)
 		list := addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		copyThread(state.splitState, state)
 		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		return list
 	}
 	state.threadGroups = append([]Group{}, threadGroups...)
 	if state.assert != noneAssert {
 		if state.checkAssertion(str, idx, preferLongest) {
 			copyThread(state.next, state)
 			return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 		}
 	}
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
 		copyThread(state.next, state)
 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	if state.groupEnd {
 		state.threadGroups[state.groupNum].EndIdx = idx
 		copyThread(state.next, state)
 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	return append(list, state)
 }
 // Helper for FindAllMatches. Returns whether it found a match, the
 // first Match it finds, and how far it got into the string ie. where
 // the next search should start from.
-//
+func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int, preferLongest bool) (bool, Match, int) {
 //	Might return duplicates or overlapping indices, so care must be taken to prune the resulting array.
 func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups int) (bool, Match, int) {
 	// Base case - exit if offset exceeds string's length
 	if offset > len(str) {
 		// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
@@ -262,239 +280,197 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 	}
 	resetThreads(start)
-	// Hold a list of match indices for the current run. When we
+	currentStates := make([]nfaState, 0)
-	// can no longer find a match, the match with the largest range is
+	nextStates := make([]nfaState, 0)
-	// chosen as the match for the entire string.
+	i := offset // Index in string
 	// This allows us to pick the longest possible match (which is how greedy matching works).
 	// COMMENT ABOVE IS CURRENTLY NOT UP-TO-DATE
 	tempIndices := newMatch(numGroups + 1)
 	foundPath := false
 	startIdx := offset
 	endIdx := offset
 	currentStates := &priorityQueue{}
 	heap.Init(currentStates)
 	tempStates := make([]*nfaState, 0) // Used to store states that should be used in next loop iteration
 	i := offset                        // Index in string
 	startingFrom := i                  // Store starting index
 	// If the first state is an assertion, makes sure the assertion
 	// is true before we do _anything_ else.
 	if start.assert != noneAssert {
-		if start.checkAssertion(str, offset) == false {
+		if start.checkAssertion(str, offset, preferLongest) == false {
 			i++
 			return false, []Group{}, i
 		}
 	}
 	// Increment until we hit a character matching the start state (assuming not 0-state)
 	if start.isEmpty == false {
 		for i < len(str) && !start.contentContains(str, i) {
 			i++
 		}
 		startIdx = i
 		startingFrom = i
 		i++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
 	}
 	start.threadGroups = newMatch(numGroups + 1)
-	// Check if the start state begins a group - if so, add the start index to our list
+	start.threadGroups[0].StartIdx = i
-	if start.groupBegin {
+	currentStates = addStateToList(str, i, currentStates, *start, start.threadGroups, nil, preferLongest)
-		start.threadGroups[start.groupNum].StartIdx = i
+	var match Match = nil
-		//		tempIndices[start.groupNum].startIdx = i
+	for idx := i; idx <= len(str); idx++ {
-	}
+		if len(currentStates) == 0 {
 			break
 		}
 		for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ {
 			currentState := currentStates[currentStateIdx]
-	start.threadSP = i
+			if currentState.threadGroups == nil {
-	heap.Push(currentStates, newPriorQueueItem(start))
+				currentState.threadGroups = newMatch(numGroups + 1)
-	// Main loop
+				currentState.threadGroups[0].StartIdx = idx
 	for currentStates.Len() > 0 {
 		currentState := heap.Pop(currentStates)
 		foundPath = false
 		zeroStates := make([]*nfaState, 0)
 		// Keep taking zero-states, until there are no more left to take
 		// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
 		topStateItem := currentStates.peek()
 		topState := topStateItem.(*priorQueueItem).state
 		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
 		tempStates = append(tempStates, zeroStates...)
 		num_appended := 0
 		for isZero == true {
 			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
 			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 			if num_appended == 0 { // Break if we haven't appended any more unique values
 				break
 			}
 		}
 		if isZero == true {
 			currentStates.Pop()
 		}
-		for _, state := range tempStates {
+			if currentState.isLast {
-			heap.Push(currentStates, newPriorQueueItem(state))
+				currentState.threadGroups[0].EndIdx = idx
-		}
+				match = append([]Group{}, currentState.threadGroups...)
-		tempStates = nil
+				if !preferLongest {
-
+					break
-		// Take any transitions corresponding to current character
+				}
-		numStatesMatched := 0            // The number of states which had at least 1 match for this round
+			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.isBackreference && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
-		assertionFailed := false         // Whether or not an assertion failed for this round
+				if currentState.contentContains(str, idx, preferLongest) {
-		lastStateInList := false         // Whether or not a last state was in our list of states
+					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
-		var lastStatePtr *nfaState = nil // Pointer to the last-state, if it was found
+				}
-		lastLookaroundInList := false    // Whether or not a last state (that is a lookaround) was in our list of states
+			} else if currentState.isBackreference && currentState.threadGroups[currentState.referredGroup].IsValid() {
-		for numStatesMatched == 0 && lastStateInList == false {
+				groupLength := currentState.threadGroups[currentState.referredGroup].EndIdx - currentState.threadGroups[currentState.referredGroup].StartIdx
-			if currentStates.Len() == 0 {
+				if currentState.threadBackref == groupLength {
-				break
+					currentState.threadBackref = 0
-			}
+					copyThread(currentState.next, currentState)
-			stateItem := heap.Pop(currentStates)
+					currentStates = addStateToList(str, idx, currentStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
-			state := stateItem.(*priorQueueItem).state
+				} else {
-			matches, numMatches := state.matchesFor(str, i)
+					idxInReferredGroup := currentState.threadGroups[currentState.referredGroup].StartIdx + currentState.threadBackref
-			if numMatches > 0 {
+					if idxInReferredGroup < len(str) && idx < len(str) && str[idxInReferredGroup] == str[idx] {
-				numStatesMatched++
+						currentState.threadBackref += 1
-				tempStates = append([]*nfaState(nil), matches...)
+						nextStates = append(nextStates, currentState)
 				foundPath = true
 				for _, m := range matches {
 					if m.threadGroups == nil {
 						m.threadGroups = newMatch(numGroups + 1)
 					}
 					m.threadSP = state.threadSP + 1
 					copy(m.threadGroups, state.threadGroups)
 				}
 			}
 			if numMatches < 0 {
 				assertionFailed = true
 			}
 			if state.isLast {
 				if state.isLookaround() {
 					lastLookaroundInList = true
 				}
 				lastStateInList = true
 				lastStatePtr = state
 			}
 		}
-		if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
+		}
-			// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
+		currentStates = append([]nfaState{}, nextStates...)
-			// state. The explanation below is my attempt to explain this behavior.
+		nextStates = nil
-			// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
+	}
-			//
+	if match != nil {
-			// One of the states in our list was a last state and a lookaround. In this case, we
+		if offset == match[0].EndIdx {
-			// don't abort upon failure of the assertion, because we have found
+			return true, match, match[0].EndIdx + 1
-			// another path to a final state.
+		}
-			// Even if the last state _was_ an assertion, we can use the previously
+		return true, match, match[0].EndIdx
-			// saved indices to find a match.
+	}
-			if lastLookaroundInList {
+	return false, []Group{}, i + 1
-				break
+}
 // Expand appends template to dst, expanding any variables in template to the relevant capturing group.
 //
 // A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
 // To insert a literal $, do not put a number after it. Alternatively, you can use $$.
 // src is the input string, and match must be the result of [Reg.FindSubmatch].
 func (re Reg) Expand(dst string, template string, src string, match Match) string {
 	templateRuneSlc := []rune(template)
 	srcRuneSlc := []rune(src)
 	i := 0
 	for i < len(templateRuneSlc) {
 		c := templateRuneSlc[i]
 		if c == '$' {
 			i += 1
 			// The dollar sign is the last character of the string, or it is proceeded by another dollar sign
 			if i >= len(templateRuneSlc) || templateRuneSlc[i] == '$' {
 				dst += "$"
 				i++
 			} else {
-				if i == startingFrom {
+				numStr := ""
 				for i < len(templateRuneSlc) && unicode.IsDigit(templateRuneSlc[i]) {
 					numStr += string(templateRuneSlc[i])
 					i++
 				}
-				return false, []Group{}, i
+				if numStr == "" {
-			}
+					dst += "$"
 		}
 		// Check if we can find a state in our list that is:
 		// 	a. A last-state
 		// 	b. Empty
 		// 	c. Doesn't assert anything
 		for _, stateItem := range *currentStates {
 			s := stateItem.state
 			if s.isLast && s.isEmpty && s.assert == noneAssert {
 				lastStatePtr = s
 				lastStateInList = true
 			}
 		}
 		if lastStateInList && numStatesMatched == 0 { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
 			for j := 1; j < numGroups+1; j++ {
 				tempIndices[j] = lastStatePtr.threadGroups[j]
 			}
 			endIdx = i
 			tempIndices[0] = Group{startIdx, endIdx}
 			if tempIndices[0].StartIdx == tempIndices[0].EndIdx {
 				return true, tempIndices, tempIndices[0].EndIdx + 1
 			} else {
 				return true, tempIndices, tempIndices[0].EndIdx
 			}
 		}
 		// Check if we can find a zero-length match
 		if foundPath == false {
 			currentStatesList := funcMap(*currentStates, func(item *priorQueueItem) *nfaState {
 				return item.state
 			})
 			if ok := zeroMatchPossible(str, i, numGroups, currentStatesList...); ok {
 				if tempIndices[0].IsValid() == false {
 					tempIndices[0] = Group{startIdx, startIdx}
 				}
 			}
 			// If we haven't moved in the string, increment the counter by 1
 			// to ensure we don't keep trying the same string over and over.
 			//			if i == startingFrom {
 			startIdx++
 			//	i++
 			//			}
 			if tempIndices.numValidGroups() > 0 && tempIndices[0].IsValid() {
 				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
 					return true, tempIndices, tempIndices[0].EndIdx + 1
 				} else {
-					return true, tempIndices, tempIndices[0].EndIdx
+					num, _ := strconv.Atoi(numStr)
 					if num < len(match) {
 						dst += string(srcRuneSlc[match[num].StartIdx:match[num].EndIdx])
 					} else {
 						dst += "$" + numStr
 					}
 				}
 			}
 			return false, []Group{}, startIdx
 		}
 		currentStates = &priorityQueue{}
 		slices.Reverse(tempStates)
 		for _, state := range tempStates {
 			heap.Push(currentStates, newPriorQueueItem(state))
 		}
 		tempStates = nil
 		i++
 	}
 	// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
 	// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
 	if currentStates.Len() > 0 {
 		topStateItem := currentStates.peek()
 		topState := topStateItem.(*priorQueueItem).state
 		zeroStates, isZero := takeZeroState([]*nfaState{topState}, numGroups, i)
 		tempStates = append(tempStates, zeroStates...)
 		num_appended := 0 // Number of unique states addded to tempStates
 		for isZero == true {
 			zeroStates, isZero = takeZeroState(tempStates, numGroups, i)
 			tempStates, num_appended = uniqueAppend(tempStates, zeroStates...)
 			if num_appended == 0 { // Break if we haven't appended any more unique values
 				break
 			}
 		}
 	}
 	for _, state := range tempStates {
 		heap.Push(currentStates, newPriorQueueItem(state))
 	}
 	tempStates = nil
 	for _, stateItem := range *currentStates {
 		state := stateItem.state
 		// Only add the match if the start index is in bounds. If the state has an assertion,
 		// make sure the assertion checks out.
 		if state.isLast && i <= len(str) {
 			if state.assert == noneAssert || state.checkAssertion(str, i) {
 				for j := 1; j < numGroups+1; j++ {
 					tempIndices[j] = state.threadGroups[j]
 				}
 				endIdx = i
 				tempIndices[0] = Group{startIdx, endIdx}
 			}
 		}
 	}
 	if tempIndices.numValidGroups() > 0 {
 		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
 			return true, tempIndices, tempIndices[0].EndIdx + 1
 		} else {
-			return true, tempIndices, tempIndices[0].EndIdx
+			dst += string(c)
 			i++
 		}
 	}
-	if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
+	return dst
-		startIdx++
+}
-	}
+
-	return false, []Group{}, startIdx
+// LiteralPrefix returns a string that must begin any match of the given regular expression.
 // The second return value is true if the string comprises the entire expression.
 func (re Reg) LiteralPrefix() (prefix string, complete bool) {
 	state := re.start
 	if state.assert != noneAssert {
 		state = state.next
 	}
 	for !(state.isLast) && (!state.isAlternation) && len(state.content) == 1 && state.assert == noneAssert {
 		if state.groupBegin || state.groupEnd {
 			state = state.next
 			continue
 		}
 		prefix += string(rune(state.content[0]))
 		state = state.next
 	}
 	if state.isLast {
 		complete = true
 	} else {
 		complete = false
 	}
 	return prefix, complete
 }
 // ReplaceAll replaces all matches of the expression in src, with the text in repl. In repl, variables are interpreted
 // as they are in [Reg.Expand]. The resulting string is returned.
 func (re Reg) ReplaceAll(src string, repl string) string {
 	matches := re.FindAllSubmatch(src)
 	i := 0
 	currentMatch := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(matches) && matches[currentMatch][0].IsValid() && i == matches[currentMatch][0].StartIdx {
 			dst += re.Expand("", repl, src, matches[currentMatch])
 			i = matches[currentMatch][0].EndIdx
 			currentMatch++
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
 // ReplaceAllLiteral replaces all matches of the expression in src, with the text in repl. The text is replaced directly,
 // without any expansion.
 func (re Reg) ReplaceAllLiteral(src string, repl string) string {
 	zerogroups := re.FindAll(src)
 	currentMatch := 0
 	i := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
 			dst += repl
 			i = zerogroups[currentMatch].EndIdx
 			currentMatch += 1
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
 // ReplaceAllFunc replaces every match of the expression in src, with the return value of the function replFunc.
 // replFunc takes in the matched string. The return value is substituted in directly without expasion.
 func (re Reg) ReplaceAllFunc(src string, replFunc func(string) string) string {
 	zerogroups := re.FindAll(src)
 	currentMatch := 0
 	i := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
 			dst += replFunc(src[zerogroups[currentMatch].StartIdx:zerogroups[currentMatch].EndIdx])
 			i = zerogroups[currentMatch].EndIdx
 			currentMatch += 1
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
--- a/regex/misc.go
+++ b/regex/misc.go
@@ -16,8 +16,11 @@ var rparenRune rune = 0xF0006
 var nonCapLparenRune rune = 0xF0007 // Represents a non-capturing group's LPAREN
 var escBackslashRune rune = 0xF0008 // Represents an escaped backslash
 var charRangeRune rune = 0xF0009    // Represents a character range
 var lazyKleeneRune rune = 0xF000A   // Represents a lazy kleene star
 var lazyPlusRune rune = 0xF000B     // Represents a lazy plus operator
 var lazyQuestionRune rune = 0xF000C // Represents a lazy question operator
-var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
+var specialChars = []rune{'?', lazyQuestionRune, '*', lazyKleeneRune, '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', lazyPlusRune, '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
 // An interface for int and rune, which are identical
 type character interface {
@@ -48,49 +51,6 @@ func isNormalChar(c rune) bool {
 	return !slices.Contains(specialChars, c)
 }
 // Ensure that the given elements are only appended to the given slice if they
 // don't already exist. Returns the new slice, and the number of unique items appended.
 func uniqueAppend[T comparable](slc []T, items ...T) ([]T, int) {
 	num_appended := 0
 	for _, item := range items {
 		if !slices.Contains(slc, item) {
 			slc = append(slc, item)
 			num_appended++
 		}
 	}
 	return slc, num_appended
 }
 func uniqueAppendFunc[T any](slc []T, fn func(T, T) bool, items ...T) ([]T, int) {
 	toRet := make([]T, len(slc))
 	num_appended := 0
 	copy(toRet, slc)
 	for _, item := range items {
 		itemExists := false
 		for _, val := range slc {
 			if fn(item, val) {
 				itemExists = true
 			}
 		}
 		if !itemExists {
 			toRet = append(toRet, item)
 			num_appended++
 		}
 	}
 	return toRet, num_appended
 }
 // Returns true only if all the given elements are equal
 func allEqual[T comparable](items ...T) bool {
 	first := items[0]
 	for _, item := range items {
 		if item != first {
 			return false
 		}
 	}
 	return true
 }
 // Map function - convert a slice of T to a slice of V, based on a function
 // that maps a T to a V
 func funcMap[T, V any](slc []T, fn func(T) V) []V {
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -25,27 +25,31 @@ const (
 )
 type nfaState struct {
-	content                    stateContents       // Contents of current state
+	content stateContents // Contents of current state
-	isEmpty                    bool                // If it is empty - Union operator and Kleene star states will be empty
+	isEmpty bool          // If it is empty - Union operator and Kleene star states will be empty
-	isLast                     bool                // If it is the last state (acept state)
+	isLast  bool          // If it is the last state (acept state)
-	output                     []*nfaState         // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
+	output  []*nfaState   // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
-	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
+	//	transitions                map[int][]*nfaState // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
-	isKleene                   bool                // Identifies whether current node is a 0-state representing Kleene star
+	next                       *nfaState  // The next state (not for alternation or kleene states)
-	isQuestion                 bool                // Identifies whether current node is a 0-state representing the question operator
+	isKleene                   bool       // Identifies whether current node is a 0-state representing Kleene star
-	isAlternation              bool                // Identifies whether current node is a 0-state representing an alternation
+	isQuestion                 bool       // Identifies whether current node is a 0-state representing the question operator
-	assert                     assertType          // Type of assertion of current node - NONE means that the node doesn't assert anything
+	isAlternation              bool       // Identifies whether current node is a 0-state representing an alternation
-	allChars                   bool                // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
+	isLazy                     bool       // Only for split states - Identifies whether or not to flip the order of branches (try one branch before the other)
-	except                     []rune              // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
+	splitState                 *nfaState  // Only for alternation states - the 'other' branch of the alternation ('next' is the first)
-	lookaroundRegex            string              // Only for lookaround states - Contents of the regex that the lookaround state holds
+	assert                     assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
-	lookaroundNFA              *nfaState           // Holds the NFA of the lookaroundRegex - if it exists
+	allChars                   bool       // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
-	lookaroundNumCaptureGroups int                 // Number of capturing groups in lookaround regex if current node is a lookaround
+	except                     []rune     // Only valid if allChars is true - match all characters _except_ the ones in this block. Useful for inverting character classes.
-	groupBegin                 bool                // Whether or not the node starts a capturing group
+	lookaroundRegex            string     // Only for lookaround states - Contents of the regex that the lookaround state holds
-	groupEnd                   bool                // Whether or not the node ends a capturing group
+	lookaroundNFA              *nfaState  // Holds the NFA of the lookaroundRegex - if it exists
-	groupNum                   int                 // Which capturing group the node starts / ends
+	lookaroundNumCaptureGroups int        // Number of capturing groups in lookaround regex if current node is a lookaround
 	groupBegin                 bool       // Whether or not the node starts a capturing group
 	groupEnd                   bool       // Whether or not the node ends a capturing group
 	groupNum                   int        // Which capturing group the node starts / ends
 	// The following properties depend on the current match - I should think about resetting them for every match.
-	zeroMatchFound bool    // Whether or not the state has been used for a zero-length match - only relevant for zero states
+	threadGroups    []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
-	threadGroups   []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
+	isBackreference bool    // Whether or not current node is backreference
-	threadSP       int     // The string pointer of the thread - where it is in the input string
+	referredGroup   int     // If current node is a backreference, the node that it points to
 	threadBackref   int     // If current node is a backreference, how many characters to look forward into the referred group
 }
 // Clones the NFA starting from the given state.
@@ -71,12 +75,11 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 		isEmpty:         stateToClone.isEmpty,
 		isLast:          stateToClone.isLast,
 		output:          make([]*nfaState, len(stateToClone.output)),
 		transitions:     make(map[int][]*nfaState),
 		isKleene:        stateToClone.isKleene,
 		isQuestion:      stateToClone.isQuestion,
 		isAlternation:   stateToClone.isAlternation,
 		isLazy:          stateToClone.isLazy,
 		assert:          stateToClone.assert,
 		zeroMatchFound:  stateToClone.zeroMatchFound,
 		allChars:        stateToClone.allChars,
 		except:          append([]rune{}, stateToClone.except...),
 		lookaroundRegex: stateToClone.lookaroundRegex,
@@ -92,20 +95,18 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 			clone.output[i] = cloneStateHelper(s, cloneMap)
 		}
 	}
 	for k, v := range stateToClone.transitions {
 		clone.transitions[k] = make([]*nfaState, len(v))
 		for i, s := range v {
 			if s == stateToClone {
 				clone.transitions[k][i] = clone
 			} else {
 				clone.transitions[k][i] = cloneStateHelper(s, cloneMap)
 			}
 		}
 	}
 	if stateToClone.lookaroundNFA == stateToClone {
 		clone.lookaroundNFA = clone
 	}
 	clone.lookaroundNFA = cloneStateHelper(stateToClone.lookaroundNFA, cloneMap)
 	if stateToClone.splitState == stateToClone {
 		clone.splitState = clone
 	}
 	clone.splitState = cloneStateHelper(stateToClone.splitState, cloneMap)
 	if stateToClone.next == stateToClone {
 		clone.next = clone
 	}
 	clone.next = cloneStateHelper(stateToClone.next, cloneMap)
 	return clone
 }
@@ -116,23 +117,27 @@ func resetThreads(start *nfaState) {
 }
 func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	if state == nil {
 		return
 	}
 	if _, ok := visitedMap[state]; ok {
 		return
 	}
 	// Assuming it hasn't been visited
 	state.threadGroups = nil
-	state.threadSP = 0
+	state.threadBackref = 0
 	visitedMap[state] = true
-	for _, v := range state.transitions {
+	if state.isAlternation {
-		for _, nextState := range v {
+		resetThreadsHelper(state.next, visitedMap)
-			resetThreadsHelper(nextState, visitedMap)
+		resetThreadsHelper(state.splitState, visitedMap)
-		}
+	} else {
 		resetThreadsHelper(state.next, visitedMap)
 	}
 }
 // Checks if the given state's assertion is true. Returns true if the given
 // state doesn't have an assertion.
-func (s nfaState) checkAssertion(str []rune, idx int) bool {
+func (s nfaState) checkAssertion(str []rune, idx int, preferLongest bool) bool {
 	if s.assert == alwaysTrueAssert {
 		return true
 	}
@@ -182,7 +187,7 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 			strToMatch = string(runesToMatch)
 		}
-		regComp := Reg{startState, s.lookaroundNumCaptureGroups}
+		regComp := Reg{startState, s.lookaroundNumCaptureGroups, s.lookaroundRegex, preferLongest}
 		matchIndices := regComp.FindAll(strToMatch)
 		numMatchesFound := 0
@@ -209,9 +214,12 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 }
 // Returns true if the contents of 's' contain the value at the given index of the given string
-func (s nfaState) contentContains(str []rune, idx int) bool {
+func (s nfaState) contentContains(str []rune, idx int, preferLongest bool) bool {
 	if s.assert != noneAssert {
-		return s.checkAssertion(str, idx)
+		return s.checkAssertion(str, idx, preferLongest)
 	}
 	if idx >= len(str) {
 		return false
 	}
 	if s.allChars {
 		return !slices.Contains(slices.Concat(notDotChars, s.except), str[idx]) // Return true only if the index isn't a 'notDotChar', or isn't one of the exception characters for the current node.
@@ -224,74 +232,84 @@ func (s nfaState) isLookaround() bool {
 	return s.assert == plaAssert || s.assert == plbAssert || s.assert == nlaAssert || s.assert == nlbAssert
 }
 func (s nfaState) numTransitions() int {
 	if s.next == nil && s.splitState == nil {
 		return 0
 	}
 	if s.next == nil || s.splitState == nil {
 		return 1
 	}
 	return 2
 }
 // Returns the matches for the character at the given index of the given string.
 // Also returns the number of matches. Returns -1 if an assertion failed.
-func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
+//func (s nfaState) matchesFor(str []rune, idx int) ([]*nfaState, int) {
-	// Assertions can be viewed as 'checks'. If the check fails, we return
+//	// Assertions can be viewed as 'checks'. If the check fails, we return
-	// an empty array and 0.
+//	// an empty array and 0.
-	// If it passes, we treat it like any other state, and return all the transitions.
+//	// If it passes, we treat it like any other state, and return all the transitions.
-	if s.assert != noneAssert {
+//	if s.assert != noneAssert {
-		if s.checkAssertion(str, idx) == false {
+//		if s.checkAssertion(str, idx) == false {
-			return make([]*nfaState, 0), -1
+//			return make([]*nfaState, 0), -1
-		}
+//		}
-	}
+//	}
-	listTransitions := s.transitions[int(str[idx])]
+//	listTransitions := s.transitions[int(str[idx])]
-	for _, dest := range s.transitions[int(anyCharRune)] {
+//	for _, dest := range s.transitions[int(anyCharRune)] {
-		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
+//		if !slices.Contains(slices.Concat(notDotChars, dest.except), str[idx]) {
-			// Add an allChar state to the list of matches if:
+//			// Add an allChar state to the list of matches if:
-			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
+//			// 		a. The current character isn't a 'notDotChars' character. In single line mode, this includes newline. In multiline mode, it doesn't.
-			// 		b. The current character isn't the state's exception list.
+//			// 		b. The current character isn't the state's exception list.
-			listTransitions = append(listTransitions, dest)
+//			listTransitions = append(listTransitions, dest)
-		}
+//		}
-	}
+//	}
-	numTransitions := len(listTransitions)
+//	numTransitions := len(listTransitions)
-	return listTransitions, numTransitions
+//	return listTransitions, numTransitions
-}
+//}
 // verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
-func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
+//func verifyLastStatesHelper(st *nfaState, visited map[*nfaState]bool) {
-	if len(st.transitions) == 0 {
+//	if st.numTransitions() == 0 {
-		st.isLast = true
+//		st.isLast = true
-		return
+//		return
-	}
+//	}
-	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
+//	//	if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
-	if len(st.transitions) == 1 { // Eg. a*
+//	if st.numTransitions() == 1 { // Eg. a*
-		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
+//		var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
-		for _, c := range st.content {
+//		for _, c := range st.content {
-			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
+//			if len(st.transitions[c]) != 1 || st.transitions[c][0] != st {
-				moreThanOneTrans = true
+//				moreThanOneTrans = true
-			}
+//			}
-		}
+//		}
-		st.isLast = !moreThanOneTrans
+//		st.isLast = !moreThanOneTrans
-	}
+//	}
-
+//
-	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
+//	if st.isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
-		transitionDests := make([]*nfaState, 0)
+//		transitionDests := make([]*nfaState, 0)
-		for _, v := range st.transitions {
+//		for _, v := range st.transitions {
-			transitionDests = append(transitionDests, v...)
+//			transitionDests = append(transitionDests, v...)
-		}
+//		}
-		if allEqual(transitionDests...) {
+//		if allEqual(transitionDests...) {
-			st.isLast = true
+//			st.isLast = true
-			return
+//			return
-		}
+//		}
-	}
+//	}
-	if visited[st] == true {
+//	if visited[st] == true {
-		return
+//		return
-	}
+//	}
-	visited[st] = true
+//	visited[st] = true
-	for _, states := range st.transitions {
+//	for _, states := range st.transitions {
-		for i := range states {
+//		for i := range states {
-			if states[i] != st {
+//			if states[i] != st {
-				verifyLastStatesHelper(states[i], visited)
+//				verifyLastStatesHelper(states[i], visited)
-			}
+//			}
-		}
+//		}
-	}
+//	}
-}
+//}
 // verifyLastStates enables the 'isLast' flag for the leaf nodes (last states)
-func verifyLastStates(start []*nfaState) {
+//func verifyLastStates(start []*nfaState) {
-	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
+//	verifyLastStatesHelper(start[0], make(map[*nfaState]bool))
-}
+//}
 // Concatenates s1 and s2, returns the start of the concatenation.
 func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
@@ -299,75 +317,84 @@ func concatenate(s1 *nfaState, s2 *nfaState) *nfaState {
 		return s2
 	}
 	for i := range s1.output {
-		for _, c := range s2.content { // Create transitions for every element in s1's content to s2'
+		s1.output[i].next = s2
 			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], s2)
 		}
 	}
 	s1.output = s2.output
 	return s1
 }
-func kleene(s1 nfaState) (*nfaState, error) {
+func kleene(s1 *nfaState) (*nfaState, error) {
 	if s1.isEmpty && s1.assert != noneAssert {
 		return nil, fmt.Errorf("previous token is not quantifiable")
 	}
 	toReturn := &nfaState{}
 	toReturn.transitions = make(map[int][]*nfaState)
 	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
 	toReturn.isAlternation = true
 	toReturn.content = newContents(epsilon)
 	toReturn.splitState = s1
 	//	toReturn := &nfaState{}
 	//	toReturn.transitions = make(map[int][]*nfaState)
 	//	toReturn.content = newContents(epsilon)
 	toReturn.isKleene = true
-	toReturn.output = append(toReturn.output, toReturn)
+	toReturn.output = append([]*nfaState{}, toReturn)
 	for i := range s1.output {
-		for _, c := range toReturn.content {
+		s1.output[i].next = toReturn
 			s1.output[i].transitions[c], _ = uniqueAppend(s1.output[i].transitions[c], toReturn)
 		}
 	}
 	for _, c := range s1.content {
 		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
 	}
 	//	for _, c := range s1.content {
 	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], &s1)
 	//	}
 	//toReturn.kleeneState = &s1
 	return toReturn, nil
 }
 func alternate(s1 *nfaState, s2 *nfaState) *nfaState {
 	toReturn := &nfaState{}
-	toReturn.transitions = make(map[int][]*nfaState)
+	//	toReturn.transitions = make(map[int][]*nfaState)
 	toReturn.output = append(toReturn.output, s1.output...)
 	toReturn.output = append(toReturn.output, s2.output...)
-	// Unique append is used here (and elsewhere) to ensure that,
+	//	// Unique append is used here (and elsewhere) to ensure that,
-	// for any given transition, a state can only be mentioned once.
+	//	// for any given transition, a state can only be mentioned once.
-	// For example, given the transition 'a', the state 's1' can only be mentioned once.
+	//	// For example, given the transition 'a', the state 's1' can only be mentioned once.
-	// This would lead to multiple instances of the same set of match indices, since both
+	//	// This would lead to multiple instances of the same set of match indices, since both
-	// 's1' states would be considered to match.
+	//	// 's1' states would be considered to match.
-	for _, c := range s1.content {
+	//	for _, c := range s1.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s1)
-	}
+	//	}
-	for _, c := range s2.content {
+	//	for _, c := range s2.content {
-		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
+	//		toReturn.transitions[c], _ = uniqueAppend(toReturn.transitions[c], s2)
-	}
+	//	}
 	toReturn.content = newContents(epsilon)
 	toReturn.isEmpty = true
 	toReturn.isAlternation = true
 	toReturn.next = s1
 	toReturn.splitState = s2
 	return toReturn
 }
-func question(s1 *nfaState) *nfaState { // Use the fact that ab? == a(b|)
+func question(s1 *nfaState) (*nfaState, error) { // Use the fact that ab? == a(b|)
-	s2 := &nfaState{}
+	if s1.isEmpty && s1.assert != noneAssert {
-	s2.transitions = make(map[int][]*nfaState)
+		return nil, fmt.Errorf("previous token is not quantifiable")
-	s2.content = newContents(epsilon)
+	}
-	s2.output = append(s2.output, s2)
+	toReturn := &nfaState{}
-	s2.isEmpty = true
+	toReturn.isEmpty = true
-	s2.isQuestion = true
+	toReturn.isAlternation = true
-	s3 := alternate(s1, s2)
+	toReturn.isQuestion = true
-	return s3
+	toReturn.content = newContents(epsilon)
 	toReturn.splitState = s1
 	toReturn.output = append([]*nfaState{}, toReturn)
 	toReturn.output = append(toReturn.output, s1.output...)
 	//	s2.transitions = make(map[int][]*nfaState)
 	return toReturn, nil
 }
 // Creates and returns a new state with the 'default' values.
 func newState() nfaState {
 	ret := nfaState{
-		output:          make([]*nfaState, 0),
+		output: make([]*nfaState, 0),
-		transitions:     make(map[int][]*nfaState),
+		//		transitions:     make(map[int][]*nfaState),
 		assert:          noneAssert,
 		except:          append([]rune{}, 0),
 		lookaroundRegex: "",
@@ -379,10 +406,42 @@ func newState() nfaState {
 }
 // Creates and returns a state that _always_ has a zero-length match.
-func zeroLengthMatchState() nfaState {
+func zeroLengthMatchState() *nfaState {
-	start := newState()
+	start := &nfaState{}
 	start.content = newContents(epsilon)
 	start.isEmpty = true
 	start.assert = alwaysTrueAssert
 	start.output = append([]*nfaState{}, start)
 	return start
 }
 func (s nfaState) equals(other nfaState) bool {
 	return s.isEmpty == other.isEmpty &&
 		s.isLast == other.isLast &&
 		slices.Equal(s.output, other.output) &&
 		slices.Equal(s.content, other.content) &&
 		s.next == other.next &&
 		s.isKleene == other.isKleene &&
 		s.isQuestion == other.isQuestion &&
 		s.isLazy == other.isLazy &&
 		s.isAlternation == other.isAlternation &&
 		s.splitState == other.splitState &&
 		s.assert == other.assert &&
 		s.allChars == other.allChars &&
 		slices.Equal(s.except, other.except) &&
 		s.lookaroundNFA == other.lookaroundNFA &&
 		s.groupBegin == other.groupBegin &&
 		s.groupEnd == other.groupEnd &&
 		s.groupNum == other.groupNum &&
 		slices.Equal(s.threadGroups, other.threadGroups) &&
 		s.threadBackref == other.threadBackref
 }
 func stateExists(list []nfaState, s nfaState) bool {
 	for i := range list {
 		if list[i].equals(s) {
 			return true
 		}
 	}
 	return false
 }
--- a/regex/postfixNode.go
+++ b/regex/postfixNode.go
@@ -1,6 +1,8 @@
 package regex
-import "fmt"
+import (
 	"fmt"
 )
 type nodeType int
@@ -20,6 +22,7 @@ const (
 	assertionNode
 	lparenNode
 	rparenNode
 	backreferenceNode
 )
 // Helper constants for lookarounds
@@ -31,15 +34,17 @@ const lookbehind = -1
 var infinite_reps int = -1 // Represents infinite reps eg. the end range in {5,}
 // This represents a node in the postfix representation of the expression
 type postfixNode struct {
-	nodetype       nodeType
+	nodetype        nodeType
-	contents       []rune        // Contents of the node
+	contents        []rune        // Contents of the node
-	startReps      int           // Minimum number of times the node should be repeated - used with numeric specifiers
+	startReps       int           // Minimum number of times the node should be repeated - used with numeric specifiers
-	endReps        int           // Maximum number of times the node should be repeated - used with numeric specifiers
+	endReps         int           // Maximum number of times the node should be repeated - used with numeric specifiers
-	allChars       bool          // Whether or not the current node represents all characters (eg. dot metacharacter)
+	allChars        bool          // Whether or not the current node represents all characters (eg. dot metacharacter)
-	except         []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
+	except          []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
-	lookaroundSign int           // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
+	lookaroundSign  int           // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
-	lookaroundDir  int           // Lookbehind or lookahead
+	lookaroundDir   int           // Lookbehind or lookahead
-	nodeContents   []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
+	nodeContents    []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
 	referencedGroup int           // ONLY USED WHEN nodetype == backreferenceNode. Holds the group which this one refers to. After parsing is done, the expression will be rewritten eg. (a)\1 will become (a)(a). So the return value of ShuntingYard() shouldn't contain a backreferenceNode.
 	isLazy          bool          // ONLY USED WHEN nodetype == kleene or question
 }
 // Converts the given list of postfixNodes to one node of type CHARCLASS.
@@ -158,10 +163,19 @@ func newPostfixNode(contents ...rune) postfixNode {
 		switch contents[0] {
 		case '+':
 			to_return.nodetype = plusNode
 		case lazyPlusRune:
 			to_return.nodetype = plusNode
 			to_return.isLazy = true
 		case '?':
 			to_return.nodetype = questionNode
 		case lazyQuestionRune:
 			to_return.nodetype = questionNode
 			to_return.isLazy = true
 		case '*':
 			to_return.nodetype = kleeneNode
 		case lazyKleeneRune:
 			to_return.nodetype = kleeneNode
 			to_return.isLazy = true
 		case '|':
 			to_return.nodetype = pipeNode
 		case concatRune:
@@ -208,3 +222,44 @@ func newPostfixCharNode(contents ...rune) postfixNode {
 	toReturn.contents = append(toReturn.contents, contents...)
 	return toReturn
 }
 // newPostfixBackreferenceNode creates and returns a backreference node, referring to the given group
 func newPostfixBackreferenceNode(referred int) postfixNode {
 	toReturn := postfixNode{}
 	toReturn.startReps = 1
 	toReturn.endReps = 1
 	toReturn.nodetype = backreferenceNode
 	toReturn.referencedGroup = referred
 	return toReturn
 }
 // rewriteBackreferences rewrites any backreferences in the given postfixNode slice, into their respective groups.
 // It stores the relation in a map, and returns it as the second return value.
 // It uses parenIndices to determine where a group starts and ends in nodes.
 // For example, \1(a) will be rewritten into (a)(a), and 1 -> 2 will be the hashmap value.
 // It returns an error if a backreference points to an invalid group.
 // func rewriteBackreferences(nodes []postfixNode, parenIndices []Group) ([]postfixNode, map[int]int, error) {
 // 	rtv := make([]postfixNode, 0)
 // 	referMap := make(map[int]int)
 // 	numGroups := 0
 // 	groupIncrement := 0 // If we have a backreference before the group its referring to, then the group its referring to will have its group number incremented.
 // 	for i, node := range nodes {
 // 		if node.nodetype == backreferenceNode {
 // 			if node.referencedGroup >= len(parenIndices) {
 // 				return nil, nil, fmt.Errorf("invalid backreference")
 // 			}
 // 			rtv = slices.Concat(rtv, nodes[parenIndices[node.referencedGroup].StartIdx:parenIndices[node.referencedGroup].EndIdx+1]) // Add all the nodes in the group to rtv
 // 			numGroups += 1
 // 			if i < parenIndices[node.referencedGroup].StartIdx {
 // 				groupIncrement += 1
 // 			}
 // 			referMap[numGroups] = node.referencedGroup + groupIncrement
 // 		} else {
 // 			rtv = append(rtv, node)
 // 			if node.nodetype == lparenNode {
 // 				numGroups += 1
 // 			}
 // 		}
 // 	}
 // 	return rtv, referMap, nil
 // }
--- a/regex/priorityQueue.go
+++ b/regex/priorityQueue.go
@@ -1,89 +0,0 @@
 package regex
 import "container/heap"
 // Implement a priority queue using container/heap
 const (
 	min_priority int = iota
 	zerostate_priority
 	alternation_priority
 	kleene_priority
 	char_priority
 	max_priority
 )
 func getPriority(state *nfaState) int {
 	if state.isKleene {
 		return zerostate_priority
 	} else if state.isAlternation {
 		return alternation_priority
 	} else {
 		if state.isEmpty {
 			return zerostate_priority
 		} else {
 			return char_priority
 		}
 	}
 }
 type priorQueueItem struct {
 	state    *nfaState
 	priority int
 	index    int
 }
 func newPriorQueueItem(state *nfaState) *priorQueueItem {
 	return &priorQueueItem{
 		state:    state,
 		index:    -1,
 		priority: getPriority(state),
 	}
 }
 type priorityQueue []*priorQueueItem
 func (pq priorityQueue) Len() int {
 	return len(pq)
 }
 func (pq priorityQueue) Less(i, j int) bool {
 	if pq[i].priority == pq[j].priority {
 		return pq[i].index < pq[j].index
 	}
 	return pq[i].priority > pq[j].priority // We want max-heap, so we use greater-than
 }
 func (pq priorityQueue) Swap(i, j int) {
 	pq[i], pq[j] = pq[j], pq[i]
 	pq[i].index = i
 	pq[j].index = j
 }
 func (pq *priorityQueue) Push(x any) {
 	length := len(*pq)
 	item := x.(*priorQueueItem)
 	item.index = length
 	*pq = append(*pq, item)
 }
 func (pq *priorityQueue) Pop() any {
 	old := *pq
 	n := len(old)
 	item := old[n-1]
 	old[n-1] = nil
 	item.index = -1
 	*pq = old[0 : n-1]
 	return item
 }
 func (pq *priorityQueue) peek() any {
 	queue := *pq
 	n := len(queue)
 	return queue[n-1]
 }
 func (pq *priorityQueue) update(item *priorQueueItem, value *nfaState, priority int) {
 	item.state = value
 	item.priority = priority
 	heap.Fix(pq, item.index)
 }
--- a/regex/range2regex.go
+++ b/regex/range2regex.go
@@ -109,7 +109,7 @@ func range2regex(start int, end int) (string, error) {
 		startSlc := intToSlc(rg.start)
 		endSlc := intToSlc(rg.end)
 		if len(startSlc) != len(endSlc) {
-			return "", fmt.Errorf("Error parsing numeric range")
+			return "", fmt.Errorf("error parsing numeric range")
 		}
 		for i := range startSlc {
 			if startSlc[i] == endSlc[i] {
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -25,7 +25,9 @@ var reTests = []struct {
 	{"a*b", nil, "qwqw", []Group{}},
 	{"(abc)*", nil, "abcabcabc", []Group{{0, 9}, {9, 9}}},
 	{"((abc)|(def))*", nil, "abcdef", []Group{{0, 6}, {6, 6}}},
-	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
+	// This match will only happen with Longest()
 	// {"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 6}, {6, 6}}},
 	{"(abc)*|(def)*", nil, "abcdef", []Group{{0, 3}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}},
 	{"b*a*a", nil, "bba", []Group{{0, 3}}},
 	{"(ab)+", nil, "abcabddd", []Group{{0, 2}, {3, 5}}},
 	{"a(b(c|d)*)*", nil, "abccbd", []Group{{0, 6}}},
@@ -177,7 +179,7 @@ var reTests = []struct {
 	{"[[:graph:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{0, 70}}},
 	// Test cases from Python's RE test suite
-	{`[\1]`, nil, "\x01", []Group{{0, 1}}},
+	{`[\01]`, nil, "\x01", []Group{{0, 1}}},
 	{`\0`, nil, "\x00", []Group{{0, 1}}},
 	{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
@@ -192,7 +194,7 @@ var reTests = []struct {
 	{`\x00ffffffffffffff`, nil, "\xff", []Group{}},
 	{`\x00f`, nil, "\x0f", []Group{}},
 	{`\x00fe`, nil, "\xfe", []Group{}},
-	{`^\w+=(\\[\000-\277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
+	{`^\w+=(\\[\000-\0277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
 	{`a.b`, nil, `acb`, []Group{{0, 3}}},
 	{`a.b`, nil, "a\nb", []Group{}},
@@ -310,11 +312,7 @@ var reTests = []struct {
 	{`a[-]?c`, nil, `ac`, []Group{{0, 2}}},
 	{`^(.+)?B`, nil, `AB`, []Group{{0, 2}}},
 	{`\0009`, nil, "\x009", []Group{{0, 2}}},
-	{`\141`, nil, "a", []Group{{0, 1}}},
+	{`\0141`, nil, "a", []Group{{0, 1}}},
 	// At this point, the python test suite has a bunch
 	// of backreference tests. Since my engine doesn't
 	// implement backreferences, I've skipped those tests.
 	{`*a`, nil, ``, nil},
 	{`(*)b`, nil, ``, nil},
@@ -431,7 +429,8 @@ var reTests = []struct {
 	{`a[-]?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AC`, []Group{{0, 2}}},
 	{`^(.+)?B`, []ReFlag{RE_CASE_INSENSITIVE}, `ab`, []Group{{0, 2}}},
 	{`\0009`, []ReFlag{RE_CASE_INSENSITIVE}, "\x009", []Group{{0, 2}}},
-	{`\141`, []ReFlag{RE_CASE_INSENSITIVE}, "A", []Group{{0, 1}}},
+	{`\0141`, []ReFlag{RE_CASE_INSENSITIVE}, "A", []Group{{0, 1}}},
 	{`\0141\0141`, []ReFlag{RE_CASE_INSENSITIVE}, "AA", []Group{{0, 2}}},
 	{`a[-]?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AC`, []Group{{0, 2}}},
@@ -462,8 +461,10 @@ var reTests = []struct {
 	{`[\D5]+`, nil, `1234abc5678`, []Group{{4, 8}}},
 	{`[\da-fA-F]+`, nil, `123abc`, []Group{{0, 6}}},
 	{`\xff`, nil, "\u00ff", []Group{{0, 1}}},
 	{`\xff+`, nil, "\u00ff\u00ff", []Group{{0, 2}}},
 	{`\xFF`, nil, "\u00ff", []Group{{0, 1}}},
 	{`\x00ff`, nil, "\u00ff", []Group{}},
 	{`\x{0000ff}+`, nil, "\u00ff\u00ff", []Group{{0, 2}}},
 	{`\x{0000ff}`, nil, "\u00ff", []Group{{0, 1}}},
 	{`\x{0000FF}`, nil, "\u00ff", []Group{{0, 1}}},
 	{"\t\n\v\r\f\a", nil, "\t\n\v\r\f\a", []Group{{0, 6}}},
@@ -471,7 +472,7 @@ var reTests = []struct {
 	{`[\t][\n][\v][\r][\f][\b]`, nil, "\t\n\v\r\f\b", []Group{{0, 6}}},
 	{`.*d`, nil, "abc\nabd", []Group{{4, 7}}},
 	{`(`, nil, "-", nil},
-	{`[\41]`, nil, `!`, []Group{{0, 1}}},
+	{`[\041]`, nil, `!`, []Group{{0, 1}}},
 	{`(?<!abc)(d.f)`, nil, `abcdefdof`, []Group{{6, 9}}},
 	{`[\w-]+`, nil, `laser_beam`, []Group{{0, 10}}},
 	{`M+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
@@ -487,7 +488,25 @@ var reTests = []struct {
 	{`[b-e]`, nil, `f`, []Group{}},
 	{`*?`, nil, `-`, nil},
-	{`a*?`, nil, `-`, nil}, // non-greedy operators are not supported
+	{`a.+c`, nil, `abcabc`, []Group{{0, 6}}},
 	// Lazy quantifier tests
 	{`a.+?c`, nil, `abcabc`, []Group{{0, 3}, {3, 6}}},
 	{`ab*?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}},
 	{`ab+?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}},
 	{`ab??bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}},
 	{`ab??bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
 	{`ab??bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{}},
 	{`ab??c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
 	{`a.*?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AXYZC`, []Group{{0, 5}}},
 	{`a.+?c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCABC`, []Group{{0, 3}, {3, 6}}},
 	{`a.*?c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCABC`, []Group{{0, 3}, {3, 6}}},
 	{`.*?\S *:`, nil, `xx:`, []Group{{0, 3}}},
 	{`a[ ]*? (\d+).*`, nil, `a   10`, []Group{{0, 6}}},
 	{`a[ ]*? (\d+).*`, nil, `a    10`, []Group{{0, 7}}},
 	{`"(?:\\"|[^"])*?"`, nil, `"\""`, []Group{{0, 4}}},
 	{`^.*?$`, nil, "one\ntwo\nthree", []Group{}},
 	{`a[^>]*?b`, nil, `a>b`, []Group{}},
 	{`^a*?$`, nil, `foo`, []Group{}},
 	// Numeric range tests - this is a feature that I added, and doesn't exist
 	// in any other mainstream regex engine
@@ -518,6 +537,14 @@ var reTests = []struct {
 	{`<389-400`, nil, `-`, nil},
 	{`<389-400>`, nil, `391`, []Group{{0, 3}}},
 	{`\b<1-10000>\b`, nil, `America declared independence in 1776.`, []Group{{33, 37}}},
 	{`\p{Tamil}+`, nil, `உயிரெழுத்து`, []Group{{0, 11}}}, // Each letter and matra is counted as a separate rune, so 'u', 'ya', 'e (matra), 'ra', 'e (matra)', 'zha', (oo (matra), 'tha', 'ith', 'tha', 'oo (matra)'.
 	{`\P{Tamil}+`, nil, `vowel=உயிரெழுத்து`, []Group{{0, 6}}},
 	{`\P`, nil, `உயிரெழுத்து`, nil},
 	{`\PM\pM*`, nil, `உயிரெழுத்து`, []Group{{0, 1}, {1, 3}, {3, 5}, {5, 7}, {7, 9}, {9, 11}}},
 	{`\pN+`, nil, `123abc456def`, []Group{{0, 3}, {6, 9}}},
 	{`\PN+`, nil, `123abc456def`, []Group{{3, 6}, {9, 12}}},
 	{`[\p{Greek}\p{Cyrillic}]`, nil, `ΣωШД`, []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}}},
 }
 var groupTests = []struct {
@@ -528,7 +555,7 @@ var groupTests = []struct {
 }{
 	{"(a)(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {1, 2}}}},
 	{"((a))(b)", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {1, 2}}}},
-	{"(0)", nil, "ab", []Match{[]Group{}}},
+	{"(0)", nil, "ab", []Match{}},
 	{"(a)b", nil, "ab", []Match{[]Group{{0, 2}, {0, 1}}}},
 	{"a(b)", nil, "ab", []Match{[]Group{{0, 2}, {1, 2}}}},
 	{"(a|b)", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
@@ -537,10 +564,11 @@ var groupTests = []struct {
 	{"(a+)|(a)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
 	{"(a+)(aa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 2}, {2, 4}}}},
 	{"(aaaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	// This match will only happen with Longest()
-	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
+	//	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {-1, -1}, {0, 4}}}},
 	{"(aaa)|(aaaa)", nil, "aaaa", []Match{[]Group{{0, 3}, {0, 3}, {-1, -1}}}},
 	{"(aaaa)|(aaa)", nil, "aaaa", []Match{[]Group{{0, 4}, {0, 4}, {-1, -1}}}},
-	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 2}, {-1, -1}, {0, 2}}}},
+	{"(a)|(aa)", nil, "aa", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 2}, {1, 2}}}},
 	{"(a?)a?", nil, "b", []Match{[]Group{{0, 0}, {0, 0}}, []Group{{1, 1}, {1, 1}}}},
 	{"(a?)a?", nil, "ab", []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}, []Group{{2, 2}, {2, 2}}}},
 	{"(a?)a?", nil, "aa", []Match{[]Group{{0, 2}, {0, 1}}, []Group{{2, 2}, {2, 2}}}},
@@ -578,13 +606,37 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `bcdd`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, nil, `a`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, nil, `a`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\041`, nil, `a!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
-	// At this point, the python test suite has a bunch
+	// Backreference tests
-	// of backreference tests. Since my engine doesn't
+	{`(abc)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
-	// implement backreferences, I've skipped those tests.
+	{`([a-c]+)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
 	{`([a-c]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
 	{`^(.+)?B`, nil, `AB`, []Match{[]Group{{0, 2}, {0, 1}}}},
 	{`(a+).\1$`, nil, `aaaaa`, []Match{[]Group{{0, 5}, {0, 2}}}},
 	{`^(a+).\1$`, nil, `aaaa`, []Match{}},
 	{`(a)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
 	{`(a+)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
 	{`(a+)+\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
 	{`(a).+\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(a)ba*\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(aa|a)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(a|aa)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(a+)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`([abc]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
 	{`(a)(?:b)\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(a)(?:b)\1`, nil, `abb`, []Match{}},
 	{`(?:a)(b)\1`, nil, `aba`, []Match{}},
 	{`(?:a)(b)\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
 	{`(?:(cat)|(dog))\2`, nil, `catdog`, []Match{}},
 	{`(?:a)\1`, nil, `aa`, nil},
 	{`((cat)|(dog)|(cow)|(bat))\4`, nil, `cowcow`, []Match{[]Group{{0, 6}, {0, 3}, {-1, -1}, {-1, -1}, {0, 3}, {-1, -1}}}},
 	{`(a|b)*\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
 	{`(a|b)*\1`, nil, `aba`, []Match{}},
 	{`(a|b)*\1`, nil, `bab`, []Match{}},
 	{`(a|b)*\1`, nil, `baa`, []Match{[]Group{{0, 3}, {1, 2}}}},
 	{`(a)(b)c|ab`, nil, `ab`, []Match{[]Group{{0, 2}}}},
 	{`(a)+x`, nil, `aaax`, []Match{[]Group{{0, 4}, {2, 3}}}},
@@ -633,7 +685,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `BCDD`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\041`, []ReFlag{RE_CASE_INSENSITIVE}, `A!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, []ReFlag{RE_CASE_INSENSITIVE}, `(A, B)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
 	{`(a)(b)c|ab`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}}}},
@@ -685,6 +737,18 @@ var groupTests = []struct {
 	// {`(a|ab|c|bcd)*(d*)`, nil, `ababcd`, []Match{[]Group{{0, 6}, {3, 6}, {6, 6}}, []Group{{6, 6}, {6, 6}, {6, 6}}}},
 	// // Bug - this should give {0,3},{0,3},{0,0},{0,3},{3,3} but it gives {0,3},{0,2},{0,1},{1,2},{2,3}
 	// //	{`((a*)(b|abc))(c*)`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 3}, {0, 0}, {0, 3}, {3, 3}}}},
 	// Lazy quantifier tests
 	{`a(?:b|c|d)+?(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
 	{`a(?:b|(c|e){1,2}?|d)+?(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {1, 2}, {2, 3}}}},
 	{`(?<!-):(.*?)(?<!-):`, nil, `a:bc-:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
 	{`(?<!\\):(.*?)(?<!\\):`, nil, `a:bc\:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
 	{`(?<!\?)'(.*?)(?<!\?)'`, nil, `a'bc?'de'f`, []Match{[]Group{{1, 9}, {2, 8}}}},
 	{`.*?x\s*\z(.*)`, []ReFlag{RE_MULTILINE, RE_SINGLE_LINE}, "xx\nx\n", []Match{[]Group{{0, 5}, {5, 5}}}},
 	{`.*?x\s*\z(.*)`, []ReFlag{RE_MULTILINE}, "xx\nx\n", []Match{[]Group{{3, 5}, {5, 5}}}},
 	{`^([ab]*?)(?=(b)?)c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}, {-1, -1}}}},
 	{`^([ab]*?)(?!(b))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}, {-1, -1}}}},
 	{`^([ab]*?)(?<!(a))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}, {-1, -1}}}},
 }
 func TestFind(t *testing.T) {
@@ -743,7 +807,7 @@ func TestFindString(t *testing.T) {
 				foundString := regComp.FindString(test.str)
 				if len(test.result) == 0 {
 					if foundString != "" {
-						t.Errorf("Expected no match got %v\n", foundString)
+						t.Errorf("Wanted no match got %v\n", foundString)
 					}
 				} else {
 					expectedString := test.str[test.result[0].StartIdx:test.result[0].EndIdx]
@@ -789,18 +853,132 @@ func TestFindSubmatch(t *testing.T) {
 				if test.result != nil {
 					panic(err)
 				}
-			}
+			} else {
-			match, err := regComp.FindSubmatch(test.str)
+				match, err := regComp.FindSubmatch(test.str)
-			for i := range match {
+				if err != nil {
-				if match[i].IsValid() {
+					if len(test.result) != 0 {
-					if test.result[0][i] != match[i] {
+						t.Errorf("Wanted %v got no match\n", test.result[0])
-						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
+					}
 				} else if len(test.result) == 0 {
 					t.Errorf("Wanted no match got %v\n", match)
 				}
 				for i := range match {
 					if match[i].IsValid() {
 						if test.result[0][i] != match[i] {
 							t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 						}
 					} else {
 						if i < len(test.result) && test.result[0][i].IsValid() {
 							t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 						}
 					}
 				}
 			}
 		})
 	}
 }
 func TestFindStringSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
 			regComp, err := Compile(test.re, test.flags...)
 			if err != nil {
 				if test.result != nil {
 					panic(err)
 				}
 			} else {
 				matchStr := regComp.FindStringSubmatch(test.str)
 				if matchStr == nil {
 					if len(test.result) != 0 {
 						expectedStr := funcMap(test.result[0], func(g Group) string {
 							if g.IsValid() {
 								return test.str[g.StartIdx:g.EndIdx]
 							} else {
 								return ""
 							}
 						})
 						t.Errorf("Wanted %v got no match\n", expectedStr)
 					}
 				} else if len(test.result) == 0 {
 					t.Errorf("Wanted no match got %v\n", matchStr)
 				} else {
 					expectedStr := funcMap(test.result[0], func(g Group) string {
 						if g.IsValid() {
 							return test.str[g.StartIdx:g.EndIdx]
 						} else {
 							return ""
 						}
 					})
 					for i, groupStr := range matchStr {
 						if groupStr == "" {
 							if i < len(expectedStr) && expectedStr[i] != "" {
 								t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
 							}
 						} else {
 							if expectedStr[i] != groupStr {
 								t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
 							}
 						}
 					}
 				}
 			}
 		})
 	}
 }
 func TestFindAllStringSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
 			regComp, err := Compile(test.re, test.flags...)
 			if err != nil {
 				if test.result != nil {
 					panic(err)
 				}
 			} else {
 				matchStrs := regComp.FindAllStringSubmatch(test.str)
 				if matchStrs == nil {
 					if len(test.result) != 0 {
 						expectedStrs := funcMap(test.result, func(m Match) []string {
 							return funcMap(m, func(g Group) string {
 								if g.IsValid() {
 									return test.str[g.StartIdx:g.EndIdx]
 								} else {
 									return ""
 								}
 							})
 						})
 						t.Errorf("Wanted %v got no match\n", expectedStrs)
 					}
 				} else if len(test.result) == 0 {
 					t.Errorf("Wanted no match got %v\n", matchStrs)
 				} else {
 					expectedStrs := funcMap(test.result, func(m Match) []string {
 						return funcMap(m, func(g Group) string {
 							if g.IsValid() {
 								return test.str[g.StartIdx:g.EndIdx]
 							} else {
 								return ""
 							}
 						})
 					})
 					for i, matchStr := range matchStrs {
 						for j, groupStr := range matchStr {
 							if groupStr == "" {
 								if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
 									t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
 								}
 							} else {
 								if expectedStrs[i][j] != groupStr {
 									t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
 								}
 							}
 						}
 					}
 				}
 			}
 		})
 	}
 }
 func TestFindAllSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
@@ -809,13 +987,18 @@ func TestFindAllSubmatch(t *testing.T) {
 				if test.result != nil {
 					panic(err)
 				}
-			}
+			} else {
-			matchIndices := regComp.FindAllSubmatch(test.str)
+				matchIndices := regComp.FindAllSubmatch(test.str)
-			for i := range matchIndices {
+				for i := range matchIndices {
-				for j := range matchIndices[i] {
+					for j := range matchIndices[i] {
-					if matchIndices[i][j].IsValid() {
+						if matchIndices[i][j].IsValid() {
-						if test.result[i][j] != matchIndices[i][j] {
+							if test.result[i][j] != matchIndices[i][j] {
-							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
+								t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 							}
 						} else {
 							if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
 								t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 							}
 						}
 					}
 				}
Author	SHA1	Message	Date
Aadhavan Srinivasan	6151cc8cf6	Updated documentation	2025-02-14 12:07:43 -05:00
Aadhavan Srinivasan	3eaf4eb19c	Updated README	2025-02-14 12:00:33 -05:00
Aadhavan Srinivasan	d453815831	Added README	2025-02-14 11:59:43 -05:00
Aadhavan Srinivasan	3a2916baae	Set 'isLazy' to true in the NFA, if the postfixNode has the flag set	2025-02-14 11:37:48 -05:00
Aadhavan Srinivasan	9d6344719f	Reverse order of trying branches if the quantifier is lazy	2025-02-14 11:37:28 -05:00
Aadhavan Srinivasan	f5c868566b	Added field to NFA, denoting if a node is lazy or not	2025-02-14 11:37:14 -05:00
Aadhavan Srinivasan	1cd6da218f	Added lazy quantifier tests	2025-02-14 11:36:56 -05:00
Aadhavan Srinivasan	277cbc0fc5	Started working on lazy quantifier support	2025-02-13 20:50:30 -05:00
Aadhavan Srinivasan	3924502b72	Added code to return lazy quantifier postfixNodes	2025-02-13 20:50:11 -05:00
Aadhavan Srinivasan	36b009747b	Added metacharacters for lazy quantifiers	2025-02-13 20:49:54 -05:00
Aadhavan Srinivasan	6cd0a10a8f	Added more documentation	2025-02-13 14:14:00 -05:00
Aadhavan Srinivasan	69fb96c43d	Merge pull request 'Implement Unicode character classes' (#4 ) from implementUnicodeCharClass into master Reviewed-on: #4	2025-02-13 09:51:44 -06:00
Aadhavan Srinivasan	46bc0c8529	Removed unicode character classes from 'features not supported' list	2025-02-13 10:48:23 -05:00
Aadhavan Srinivasan	1a890a1e75	Refactoring - remove duplicate code	2025-02-13 09:10:40 -05:00
Aadhavan Srinivasan	fde3784e5a	Added unicode charclass support within character classes; Fixed bugs with hex classes and unicode classes	2025-02-13 08:58:02 -05:00
Aadhavan Srinivasan	7045711860	Convert test_str into a rune slice for better unicode compatibility, it also fixed the bug where all unicode characters wouldn't be colored	2025-02-13 08:57:06 -05:00
Aadhavan Srinivasan	d4d606d95b	Added tests for unicode character classes; more tests for hex characters	2025-02-13 08:55:12 -05:00
Aadhavan Srinivasan	9cd330e521	More work on unicode character class support - fix bug where all characters aren't being matched	2025-02-12 23:04:10 -05:00
Aadhavan Srinivasan	44d6a2005c	Started working on unicode character classes	2025-02-12 22:19:30 -05:00
Aadhavan Srinivasan	f76cd6c3d9	Merge pull request 'Implement Backreferences' (#3 ) from implementBackreferences into master Reviewed-on: #3	2025-02-12 21:17:32 -06:00
Aadhavan Srinivasan	375baa1722	Wrote more backreference tests	2025-02-12 07:51:20 -05:00
Aadhavan Srinivasan	2e47c631bb	Updated documentation to include backreferences	2025-02-12 07:50:59 -05:00
Aadhavan Srinivasan	81b8b1b11c	Do not validate a backreference if the group that it refers to is not valid	2025-02-11 19:12:58 -05:00
Aadhavan Srinivasan	2934e7a20f	Wrote tests for backreferences	2025-02-11 19:12:40 -05:00
Aadhavan Srinivasan	f466d4a8d5	More progress on backreference implementation	2025-02-11 17:06:39 -05:00
Aadhavan Srinivasan	8327450dd2	Started implementing backreferences (octal values should now be prefaced with \0)	2025-02-11 16:14:54 -05:00
Aadhavan Srinivasan	073f231b89	Added function and examples for ReplaceAllFunc()	2025-02-10 21:35:51 -05:00
Aadhavan Srinivasan	3b7257c921	Wrote function and example for ReplaceAllLiteral()	2025-02-10 21:25:49 -05:00
Aadhavan Srinivasan	668df8b70a	Wrote MarshalText() and UnmarshalText() to implement TextMarshaler and TextUnmarshaler	2025-02-10 12:30:48 -05:00
Aadhavan Srinivasan	214acf7e0f	Wrote example for ReplaceAll(); fixed out-of-bounds bug in Expand()	2025-02-10 12:30:17 -05:00
Aadhavan Srinivasan	50221ff4d9	Wrote ReplaceAll(), to replace all matches of the regex with a given string	2025-02-10 12:29:54 -05:00
Aadhavan Srinivasan	5ab95f512a	Updated docs	2025-02-10 09:36:00 -05:00
Aadhavan Srinivasan	e7da678408	Removed obsolete documentation	2025-02-10 09:35:16 -05:00
Aadhavan Srinivasan	ab363e2766	Rewrote test for 'FindString()' to use lookarounds	2025-02-10 09:24:47 -05:00
Aadhavan Srinivasan	c803e45415	Added example for 'FindStringSubmatch()'	2025-02-10 09:19:24 -05:00
Aadhavan Srinivasan	525296f239	Added examples for 'FindAllString()' , 'FindAllSubmatch()' and 'FindAllStringSubmatch()'	2025-02-10 09:10:39 -05:00
Aadhavan Srinivasan	eb0ab9f7ec	Wrote test for FindAllStringSubmatch()	2025-02-10 08:39:20 -05:00
Aadhavan Srinivasan	17a7dbae4c	Wrote FindAllStringSubmatch()	2025-02-10 08:39:10 -05:00
Aadhavan Srinivasan	f2279acd98	Fixed mistake in docs	2025-02-10 08:12:09 -05:00
Aadhavan Srinivasan	662527c478	Merge pull request 'Implement PCRE Matching (prefer left-branch)' (#2 ) from implementPCREMatchingRules into master Reviewed-on: #2	2025-02-09 15:24:26 -06:00
Aadhavan Srinivasan	d1958f289c	Commented out tests that would only pass with Longest()	2025-02-09 16:08:16 -05:00
Aadhavan Srinivasan	15ee49f42e	Rename method receivers from 'regex' to 're' (it's shorter)	2025-02-09 15:51:46 -05:00
Aadhavan Srinivasan	b60ded4136	Don't break when a match is found, if we are looking for the longest match	2025-02-09 15:48:33 -05:00
Aadhavan Srinivasan	9fbb99f86c	Wrote example for Longest()	2025-02-09 15:47:57 -05:00
Aadhavan Srinivasan	af15904f3b	Updated documentation	2025-02-09 15:41:13 -05:00
Aadhavan Srinivasan	d522f50b50	Wrote new example functions	2025-02-09 15:40:59 -05:00
Aadhavan Srinivasan	fb47e082eb	Wrote new methods Expand() and preferLongest(); Use new function signatures (with preferLongest); only characters should be added to next state list	2025-02-09 15:40:39 -05:00
Aadhavan Srinivasan	1f5a363539	Use new function signatures (with preferLongest)	2025-02-09 15:39:09 -05:00
Aadhavan Srinivasan	9e12f9dcb3	Added field to Reg, denoting if we prefer longest match (POSIX style) or not (perl style)	2025-02-09 15:38:26 -05:00
Aadhavan Srinivasan	47f88c817f	Fixed typo	2025-02-09 15:14:17 -05:00
Aadhavan Srinivasan	835d495990	Removed capitalization for error message (staticcheck)	2025-02-09 09:14:45 -05:00
Aadhavan Srinivasan	76e0170cb9	Removed unused function	2025-02-09 09:13:52 -05:00
Aadhavan Srinivasan	d172a58258	Throw error if match isn't found but test.result has >0 elements	2025-02-09 09:13:29 -05:00
Aadhavan Srinivasan	7231169270	Removed unused functions	2025-02-09 09:13:03 -05:00
Aadhavan Srinivasan	e546f01c20	Removed redundant return (staticcheck)	2025-02-09 09:12:55 -05:00
Aadhavan Srinivasan	b7467a00f1	Removed priorityQueue (unused)	2025-02-09 09:07:43 -05:00
Aadhavan Srinivasan	c6ad4caa0d	Removed a bunch of unused code (let's go!!!)	2025-02-09 09:06:40 -05:00
Aadhavan Srinivasan	6334435b83	Updated tests since the engine uses Perl matching instead of POSIX matching; added tests for FindStringSubmatch	2025-02-09 09:01:42 -05:00
Aadhavan Srinivasan	78fb5606dd	Use new definition of Reg	2025-02-09 08:59:16 -05:00
Aadhavan Srinivasan	eddd2ae700	Updated documentation	2025-02-09 08:58:58 -05:00
Aadhavan Srinivasan	c577064977	Added string field to Reg, that contains the expression string; wrote method to return the string	2025-02-09 08:58:46 -05:00
Aadhavan Srinivasan	d4e3942d27	Added Match() and FindStringSubmatch(); removed old code; updated comments	2025-02-09 08:58:09 -05:00
Aadhavan Srinivasan	f15a5cae34	Store all states visited in a single run of 'addStateToList()' in a slice	2025-02-08 16:07:01 -05:00
Aadhavan Srinivasan	62ca1a872a	Made zeroLengthMatchState() return a pointer; reduced the number of comparisons performd by nfaState.equals	2025-02-08 16:06:14 -05:00
Aadhavan Srinivasan	99230b49de	Use new function signature for zeroLengthMatchState()	2025-02-08 16:05:35 -05:00
Aadhavan Srinivasan	22ead83625	Fixed assertion matching	2025-02-07 16:19:36 -05:00
Aadhavan Srinivasan	3604486a9b	Used Pike's algorithm (an extension to Thompson's algorithm) (see Russ Cox's 2nd article); I think I almost have a working PCRE-style engine	2025-02-07 16:06:45 -05:00
Aadhavan Srinivasan	052de55826	question() now returns 2 values	2025-02-07 16:04:46 -05:00
Aadhavan Srinivasan	d2ad0d95a8	Modified question operator so that it doesn't create an unnecessary zero-state	2025-02-07 16:04:26 -05:00
Aadhavan Srinivasan	ccf3b3b299	More progress on implementing PCRE matching	2025-02-06 22:08:56 -05:00
Aadhavan Srinivasan	1d4f695f8f	Wrote function to check if a state is in an nfaState, based on the Equals function	2025-02-06 22:06:51 -05:00
Aadhavan Srinivasan	8534174ea1	Use pointers instead of values	2025-02-06 22:06:22 -05:00
Aadhavan Srinivasan	ed4ffde64e	REFACTOR NEEDED: Added another special case; insert instead of appending into currentStates	2025-02-05 22:51:55 -05:00
Aadhavan Srinivasan	fbc9bea9fb	Commented out unused functions; use new nfaState parameters	2025-02-05 22:23:31 -05:00
Aadhavan Srinivasan	cca8c7cda2	Got rid of transitions parameter, changed how kleene state is processed I replaced the transition parameter for nfaState, replacing it with a single nfaState pointer. This is because any non-alternation state will only have one next state, so the map was just added complexity. I changed alternation processing - instead of having their own dedicated fields, they just use the new 'next' parameter, and another one called 'splitState'. I also changed the kleene state processing to remove the unecessary empty state in the right-side alternation (it actually messed up my matching).	2025-02-05 22:20:28 -05:00
Aadhavan Srinivasan	858e535fba	Continued implementing Thompson's algorithm	2025-02-05 18:01:36 -05:00
Aadhavan Srinivasan	7c62ba6bfd	Started implementing Thompson's algorithm for matching, because the old one was completely backtracking (so it would enter infinite loops on something like '(a)' ) The git diff claims that a ton of code was changed, but most of it was just indentation changes.	2025-02-05 12:21:12 -05:00
Aadhavan Srinivasan	d4e8cb74fd	Replaced pointer to nfaState with nfaState	2025-02-05 11:32:20 -05:00
Aadhavan Srinivasan	3ce611d121	More work towards implementing PCRE matching	2025-02-04 14:09:24 -05:00
Aadhavan Srinivasan	e0253dfaf3	Change kleene() to an alternation-style construct	2025-02-04 14:09:04 -05:00
Aadhavan Srinivasan	753e973d82	Started rewrite of matching algorithm, got concatenation and alternation done, kleene and zero-state stuff is next	2025-02-03 22:01:52 -05:00
Aadhavan Srinivasan	5563a70568	Reverse the order in which I pop states for alternation, because this messes with the left branch-right branch thing	2025-02-03 21:59:41 -05:00
Aadhavan Srinivasan	de0d7345a8	Store left and right branches of alternation separately	2025-02-03 21:59:05 -05:00