Started working on line number flag

Started working on multiple filename arguments; prefix each line with filename containing the line; mostly indentation changes
Updated TODO
2025-03-15 16:24:50 -04:00 · 2025-03-13 12:11:54 -04:00 · 2025-03-12 16:46:57 -04:00 · 2025-03-12 16:46:05 -04:00 · 2025-03-12 16:44:40 -04:00 · 2025-02-24 07:46:54 -05:00
12 changed files with 927 additions and 265 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,17 @@
 ## Kleingrep
 Kleingrep is a regular expression engine, providing a library and command-line tool written in Go.
 It aims to provide a more featureful engine, compared to the one in Go's
 [regexp](https://pkg.go.dev/regexp), while retaining some semblance of efficiency.
 The engine does __not__ use backtracking, relying on the NFA-based method described in
 [Russ Cox's articles](https://swtch.com/~rsc/regexp). As such, it is immune to catastrophic backtracking.
 It also includes features not present in regexp, such as lookarounds and backreferences.
 ### Syntax
 The syntax is, for the most part, a superset of Go's regexp. A full overview of the syntax can be found [here](https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex#hdr-Syntax).
 __For more information, see https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex__.
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"io"
 	"os"
 	"slices"
 	"github.com/fatih/color"
@@ -25,6 +26,8 @@ func main() {
 	multiLineFlag := flag.Bool("t", false, "Multi-line mode. Treats newline just like any character.")
 	printMatchesFlag := flag.Bool("p", false, "Prints start and end index of each match. Can only be used with '-t' for multi-line mode.")
 	caseInsensitiveFlag := flag.Bool("i", false, "Case-insensitive. Disregard the case of all characters.")
 	recursiveFlag := flag.Bool("r", false, "Recursively search all files in the given directory.")
 	lineNumFlag := flag.Bool("n", false, "For each line with a match, print the line number. Implies -l.")
 	matchNum := flag.Int("m", 0, "Print the match with the given index. Eg. -m 3 prints the third match.")
 	substituteText := flag.String("s", "", "Substitute the contents of each match with the given string. Overrides -o and -v")
 	flag.Parse()
@@ -64,162 +67,197 @@ func main() {
 	// 2. Build NFA from postfix representation (Thompson's algorithm)
 	// 3. Run the string against the NFA
-	if len(flag.Args()) != 1 { // flag.Args() also strips out program name
+	if len(flag.Args()) < 1 { // flag.Args() also strips out program name
 		fmt.Println("ERROR: Missing cmdline args")
 		os.Exit(22)
 	}
 	if *recursiveFlag && len(flag.Args()) < 2 { // File/Directory must be provided with '-r'
 		fmt.Println("ERROR: Missing cmdline args")
 		os.Exit(22)
 	}
 	var re string
 	re = flag.Args()[0]
 	var inputFiles []*os.File
 	if len(flag.Args()) == 1 || flag.Args()[1] == "-" { // Either no file argument, or file argument is "-"
 		if !slices.Contains(inputFiles, os.Stdin) {
 			inputFiles = append(inputFiles, os.Stdin) // os.Stdin cannot be entered more than once into the file list
 		}
 	} else {
 		inputFilenames := flag.Args()[1:]
 		for _, inputFilename := range inputFilenames {
 			inputFile, err := os.Open(inputFilename)
 			if err != nil {
 				fmt.Printf("%s: No such file or directory\n", flag.Args()[1])
 				os.Exit(2)
 			}
 			inputFiles = append(inputFiles, inputFile)
 		}
 	}
 	var test_str string
 	var err error
 	var linesRead bool // Whether or not we have read the lines in the file
 	lineNum := 0       // Current line number
-	// Create reader for stdin and writer for stdout
+	// Create writer for stdout
 	reader := bufio.NewReader(os.Stdin)
 	out := bufio.NewWriter(os.Stdout)
-
+	// Compile regex
 	regComp, err := reg.Compile(re, flagsToCompile...)
 	if err != nil {
 		fmt.Println(err)
 		return
 	}
-	for true {
+
-		if linesRead {
+	for _, inputFile := range inputFiles {
-			break
+		reader := bufio.NewReader(inputFile)
-		}
+		linesRead = false
-		if !(*multiLineFlag) {
+		for true {
-			// Read every string from stdin until we encounter an error. If the error isn't EOF, panic.
+			if linesRead {
-			test_str, err = reader.ReadString('\n')
+				break
-			lineNum++
+			}
-			if err != nil {
+			if !(*multiLineFlag) {
 				// Read every string from stdin until we encounter an error. If the error isn't EOF, panic.
 				test_str, err = reader.ReadString('\n')
 				lineNum++
 				if err != nil {
 					if err == io.EOF {
 						linesRead = true
 					} else {
 						panic(err)
 					}
 				}
 				if len(test_str) > 0 && test_str[len(test_str)-1] == '\n' {
 					test_str = test_str[:len(test_str)-1]
 				}
 			} else {
 				// Multi-line mode - read every line of input into a temp. string.
 				// test_str will contain all lines of input (including newline characters)
 				// as one string.
 				var temp string
 				for temp, err = reader.ReadString('\n'); err == nil; temp, err = reader.ReadString('\n') {
 					test_str += temp
 				}
 				// Assuming err != nil
 				if err == io.EOF {
 					if len(temp) > 0 {
 						test_str += temp // Add the last line (if it is non-empty)
 					}
 					linesRead = true
 				} else {
 					panic(err)
 				}
 			}
-			if len(test_str) > 0 && test_str[len(test_str)-1] == '\n' {
+			matchIndices := make([]reg.Match, 0)
-				test_str = test_str[:len(test_str)-1]
+			if matchNumFlagEnabled {
-			}
+				tmp, err := regComp.FindNthMatch(test_str, *matchNum)
-		} else {
+				if err == nil {
-			// Multi-line mode - read every line of input into a temp. string.
+					matchIndices = append(matchIndices, tmp)
 			// test_str will contain all lines of input (including newline characters)
 			// as one string.
 			var temp string
 			for temp, err = reader.ReadString('\n'); err == nil; temp, err = reader.ReadString('\n') {
 				test_str += temp
 			}
 			// Assuming err != nil
 			if err == io.EOF {
 				if len(temp) > 0 {
 					test_str += temp // Add the last line (if it is non-empty)
 				}
 				linesRead = true
 			} else {
-				panic(err)
+				matchIndices = regComp.FindAllSubmatch(test_str)
 			}
 		}
 		matchIndices := make([]reg.Match, 0)
 		if matchNumFlagEnabled {
 			tmp, err := regComp.FindNthMatch(test_str, *matchNum)
 			if err == nil {
 				matchIndices = append(matchIndices, tmp)
 			}
 		} else {
 			matchIndices = regComp.FindAllSubmatch(test_str)
 		}
-		if *printMatchesFlag {
+			test_str_runes := []rune(test_str) // Converting to runes preserves unicode characters
 			// if we are in single line mode, print the line on which
 			// the matches occur
 			if len(matchIndices) > 0 {
 				if !(*multiLineFlag) {
 					fmt.Fprintf(out, "Line %d:\n", lineNum)
 				}
 				for _, m := range matchIndices {
 					fmt.Fprintf(out, "%s\n", m.String())
 				}
 				err := out.Flush()
 				if err != nil {
 					panic(err)
 				}
 			}
 			continue
 		}
 		// Decompose the array of matchIndex structs into a flat unique array of ints - if matchIndex is {4,7}, flat array will contain 4,5,6
 		// This should make checking O(1) instead of O(n)
 		indicesToPrint := new_uniq_arr[int]()
 		for _, idx := range matchIndices {
 			indicesToPrint.add(genRange(idx[0].StartIdx, idx[0].EndIdx)...)
 		}
 		// If we are inverting, then we should print the indices which _didn't_ match
 		// in color.
 		if *invertFlag {
 			oldIndices := indicesToPrint.values()
 			indicesToPrint = new_uniq_arr[int]()
 			// Explanation:
 			// Find all numbers from 0 to len(test_str) that are NOT in oldIndices.
 			// These are the values we want to print, now that we have inverted the match.
 			// Re-initialize indicesToPrint and add all of these values to it.
 			indicesToPrint.add(setDifference(genRange(0, len(test_str)), oldIndices)...)
-		}
+			if *printMatchesFlag {
-		// If lineFlag is enabled, we should only print something if:
+				// if we are in single line mode, print the line on which
-		// 		a. We are not inverting, and have at least one match on the current line
+				// the matches occur
-		// 		OR
+				if len(matchIndices) > 0 {
-		// 		b. We are inverting, and have no matches at all on the current line.
+					if !(*multiLineFlag) {
-		// This checks for the inverse, and continues if it is true.
+						fmt.Fprintf(out, "Line %d:\n", lineNum)
-		if *lineFlag {
+					}
-			if !(*invertFlag) && len(matchIndices) == 0 || *invertFlag && len(matchIndices) > 0 {
+					for _, m := range matchIndices {
 						fmt.Fprintf(out, "%s\n", m.String())
 					}
 					err := out.Flush()
 					if err != nil {
 						panic(err)
 					}
 				}
 				continue
 			}
-		}
+			// Decompose the array of matchIndex structs into a flat unique array of ints - if matchIndex is {4,7}, flat array will contain 4,5,6
 			// This should make checking O(1) instead of O(n)
 			indicesToPrint := new_uniq_arr[int]()
 			for _, idx := range matchIndices {
 				indicesToPrint.add(genRange(idx[0].StartIdx, idx[0].EndIdx)...)
 			}
 			// If we are inverting, then we should print the indices which _didn't_ match
 			// in color.
 			if *invertFlag {
 				oldIndices := indicesToPrint.values()
 				indicesToPrint = new_uniq_arr[int]()
 				// Explanation:
 				// Find all numbers from 0 to len(test_str_runes) that are NOT in oldIndices.
 				// These are the values we want to print, now that we have inverted the match.
 				// Re-initialize indicesToPrint and add all of these values to it.
 				indicesToPrint.add(setDifference(genRange(0, len(test_str_runes)), oldIndices)...)
-		// If we are substituting, we need a different behavior, as follows:
+			}
-		// For every character in the test string:
+			// If lineFlag is enabled, we should only print something if:
-		// 		1. Check if the index is the start of any matchIndex
+			// 		a. We are not inverting, and have at least one match on the current line
-		// 		2. If so, print the substitute text, and set our index to
+			// 		OR
-		//			the corresponding end index.
+			// 		b. We are inverting, and have no matches at all on the current line.
-		// 		3. If not, just print the character.
+			// This checks for the inverse, and continues if it is true.
-		if substituteFlagEnabled {
+			if *lineFlag {
-			for i := range test_str {
+				if !(*invertFlag) && len(matchIndices) == 0 || *invertFlag && len(matchIndices) > 0 {
-				inMatchIndex := false
+					continue
-				for _, m := range matchIndices {
+				} else {
-					if i == m[0].StartIdx {
+					color.New(color.FgMagenta).Fprintf(out, "%s: ", inputFile.Name()) // Print filename
 						fmt.Fprintf(out, "%s", *substituteText)
 						i = m[0].EndIdx
 						inMatchIndex = true
 						break
 					}
 				}
 				if !inMatchIndex {
 					fmt.Fprintf(out, "%c", test_str[i])
 				}
 			}
-		} else {
+
-			for i, c := range test_str {
+			// If we are substituting, we need a different behavior, as follows:
-				if indicesToPrint.contains(i) {
+			// For every character in the test string:
-					color.New(color.FgRed).Fprintf(out, "%c", c)
+			// 		1. Check if the index is the start of any matchIndex
-					// Newline after every match - only if -o is enabled and -v is disabled.
+			// 		2. If so, print the substitute text, and set our index to
-					if *onlyFlag && !(*invertFlag) {
+			//			the corresponding end index.
-						for _, idx := range matchIndices {
+			// 		3. If not, just print the character.
-							if i+1 == idx[0].EndIdx { // End index is one more than last index of match
+			if substituteFlagEnabled {
-								fmt.Fprintf(out, "\n")
+				for i := range test_str_runes {
-								break
+					inMatchIndex := false
-							}
+					for _, m := range matchIndices {
 						if i == m[0].StartIdx {
 							fmt.Fprintf(out, "%s", *substituteText)
 							i = m[0].EndIdx
 							inMatchIndex = true
 							break
 						}
 					}
-				} else {
+					if !inMatchIndex {
-					if !(*onlyFlag) {
+						fmt.Fprintf(out, "%c", test_str_runes[i])
-						fmt.Fprintf(out, "%c", c)
+					}
 				}
 			} else {
 				for i, c := range test_str_runes {
 					if indicesToPrint.contains(i) {
 						color.New(color.FgRed, color.Bold).Fprintf(out, "%c", c)
 						// Newline after every match - only if -o is enabled and -v is disabled.
 						if *onlyFlag && !(*invertFlag) {
 							for matchIdxNum, idx := range matchIndices {
 								if matchIdxNum < len(matchIndices)-1 { // Only print a newline afte printing a match, if there are multiple matches on the line, and we aren't on the last one. This is because the newline that gets added at the end will take care of that.
 									if i+1 == idx[0].EndIdx { // End index is one more than last index of match
 										fmt.Fprintf(out, "\n")
 										break
 									}
 								}
 							}
 						}
 					} else {
 						if !(*onlyFlag) {
 							fmt.Fprintf(out, "%c", c)
 						}
 					}
 				}
 			}
 			err = out.Flush()
 			if err != nil {
 				panic(err)
 			}
 			// If the last character in the string wasn't a newline, AND we either have don't -o set or we do (and we've matched something), then print a newline
 			if (len(test_str_runes) > 0 && test_str_runes[len(test_str_runes)-1] != '\n') &&
 				(!*onlyFlag || indicesToPrint.len() > 0) {
 				fmt.Println()
 			}
 		}
 		err = out.Flush()
 		if err != nil {
 			panic(err)
 		}
 		fmt.Println()
 	}
 }
--- a/cmd/unique_array.go
+++ b/cmd/unique_array.go
@@ -36,3 +36,7 @@ func (s uniq_arr[T]) values() []T {
 	}
 	return toRet
 }
 func (s uniq_arr[T]) len() int {
 	return len(s.backingMap)
 }
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -31,6 +31,22 @@ func (re Reg) String() string {
 	return re.str
 }
 // MarshalText implements [encoding.TextMarshaler]. The output is equivalent to that of [Reg.String].
 // Any flags passed as arguments (including calling [Reg.Longest]) are lost.
 func (re *Reg) MarshalText() ([]byte, error) {
 	return []byte(re.String()), nil
 }
 // UnmarshalText implements [encoding.TextUnmarshaler]. It calls [Reg.Compile] on the given byte-slice. If it returns successfully,
 // then the result of the compilation is stored in re. The result of [Reg.Compile] is returned.
 func (re *Reg) UnmarshalText(text []byte) error {
 	newReg, err := Compile(string(text))
 	if err == nil {
 		*re = newReg
 	}
 	return err
 }
 func (re *Reg) Longest() {
 	re.preferLongest = true
 }
@@ -48,7 +64,7 @@ const (
 )
 func isOperator(c rune) bool {
-	if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune {
+	if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune || c == lazyPlusRune || c == lazyKleeneRune || c == lazyQuestionRune {
 		return true
 	}
 	return false
@@ -56,7 +72,7 @@ func isOperator(c rune) bool {
 /* priority returns the priority of the given operator */
 func priority(op rune) int {
-	precedence := []rune{'|', concatRune, '+', '*', '?'}
+	precedence := []rune{'|', concatRune, '+', lazyPlusRune, '*', lazyKleeneRune, '?', lazyQuestionRune}
 	return slices.Index(precedence, op)
 }
@@ -92,6 +108,48 @@ func getPOSIXClass(str []rune) (bool, string) {
 	return true, rtv
 }
 // isUnicodeCharClassLetter returns whether or not the given letter represents a unicode character class.
 func isUnicodeCharClassLetter(c rune) bool {
 	return slices.Contains([]rune{'L', 'M', 'S', 'N', 'P', 'C', 'Z'}, c)
 }
 // rangeTableToRuneSlice converts the given range table into a rune slice and returns it.
 func rangeTableToRuneSlice(rangetable *unicode.RangeTable) []rune {
 	var rtv []rune
 	for _, r := range rangetable.R16 {
 		for c := r.Lo; c <= r.Hi; c += r.Stride {
 			rtv = append(rtv, rune(c))
 		}
 	}
 	for _, r := range rangetable.R32 {
 		for c := r.Lo; c <= r.Hi; c += r.Stride {
 			rtv = append(rtv, rune(c))
 		}
 	}
 	return rtv
 }
 // unicodeCharClassToRange converts the given unicode character class name into a list of characters in that class.
 // This class could also be a single letter eg. 'C'.
 func unicodeCharClassToRange(class string) ([]rune, error) {
 	if len(class) == 0 {
 		return nil, fmt.Errorf("empty unicode character class")
 	}
 	if len(class) == 1 || len(class) == 2 {
 		if rangeTable, ok := unicode.Categories[class]; ok {
 			return rangeTableToRuneSlice(rangeTable), nil
 		} else {
 			return nil, fmt.Errorf("invalid short unicode character class")
 		}
 	} else {
 		if rangeTable, ok := unicode.Scripts[class]; ok {
 			return rangeTableToRuneSlice(rangeTable), nil
 		} else {
 			return nil, fmt.Errorf("invalid long unicode character class")
 		}
 	}
 }
 // Stores whether the case-insensitive flag has been enabled.
 var caseInsensitive bool
@@ -150,9 +208,6 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 	//		metacharacter. Later, in thompson(), these will be converted back. This avoids
 	//		confusion in detecting whether a character is escaped eg. detecting
 	// 		whether '\\[a]' has an escaped opening bracket (it doesn't).
 	//
 	// 	5. 	Check for non-greedy operators. These are not supported at the moment, so an error
 	// 		must be thrown if the user attempts to use a non-greedy operator.
 	for i := 0; i < len(re_runes_orig); i++ {
 		c := re_runes_orig[i]
 		if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
@@ -199,8 +254,16 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
 			re_runes = append(re_runes, rbracketRune)
 			continue
-		} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
+		} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i > 0 && re_runes_orig[i-1] != '\\') && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
-			return nil, fmt.Errorf("non-greedy operators are not supported")
+			switch c {
 			case '+':
 				re_runes = append(re_runes, lazyPlusRune)
 			case '*':
 				re_runes = append(re_runes, lazyKleeneRune)
 			case '?':
 				re_runes = append(re_runes, lazyQuestionRune)
 			}
 			i++
 		} else {
 			re_runes = append(re_runes, c)
 		}
@@ -293,17 +356,44 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 					}
 				} else if isHex(re_runes[i]) {
 					re_postfix = append(re_postfix, re_runes[i:i+2]...)
-					i += 2
+					i += 1 // I don't skip forward 2 steps, because the second step will happen with the loop increment
 				} else {
 					return nil, fmt.Errorf("invalid hex value in expression")
 				}
-			} else if isOctal(re_runes[i]) {
+			} else if re_runes[i] == 'p' || re_runes[i] == 'P' { // Unicode character class (P is negated unicode charclass)
 				re_postfix = append(re_postfix, re_runes[i])
 				i++
 				if i >= len(re_runes) {
 					return nil, fmt.Errorf("error parsing unicode character class in expression")
 				}
 				if re_runes[i] == '{' { // Full name charclass
 					for re_runes[i] != '}' {
 						re_postfix = append(re_postfix, re_runes[i])
 						i++
 					}
 					re_postfix = append(re_postfix, re_runes[i])
 					i++
 				} else if isUnicodeCharClassLetter(re_runes[i]) {
 					re_postfix = append(re_postfix, re_runes[i])
 					i++
 				} else {
 					return nil, fmt.Errorf("error parsing unicode character class in expression")
 				}
 				i-- // The loop increment at the top will move us forward
 			} else if re_runes[i] == '0' { // Start of octal value
 				numDigits := 1
-				for i+numDigits < len(re_runes) && numDigits < 3 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 3)
+				for i+numDigits < len(re_runes) && numDigits < 4 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 4, starting with 0)
 					numDigits++
 				}
 				re_postfix = append(re_postfix, re_runes[i:i+numDigits]...)
 				i += (numDigits - 1) // I have to move back a step, so that I can add a concatenation operator if necessary, and so that the increment at the bottom of the loop works as intended
 			} else if unicode.IsDigit(re_runes[i]) { // Any other number - backreference
 				numDigits := 1
 				for i+numDigits < len(re_runes) && unicode.IsDigit(re_runes[i+numDigits]) { // Skip while we see a digit
 					numDigits++
 				}
 				re_postfix = append(re_postfix, re_runes[i:i+numDigits]...)
 				i += (numDigits - 1) // Move back a step to add concatenation operator
 			} else {
 				re_postfix = append(re_postfix, re_runes[i])
 			}
@@ -320,10 +410,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				if i >= len(re_runes) {
 					return nil, fmt.Errorf("unclosed lookaround")
 				}
-				if re_runes[i] == '(' || re_runes[i] == nonCapLparenRune {
+				if (re_runes[i] == '(' && re_runes[i-1] != '\\') || re_runes[i] == nonCapLparenRune {
 					numOpenParens++
 				}
-				if re_runes[i] == ')' {
+				if re_runes[i] == ')' && re_runes[i-1] != '\\' {
 					numOpenParens--
 					if numOpenParens == 0 {
 						break
@@ -336,7 +426,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		}
 		if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != nonCapLparenRune && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
 			if i < len(re_runes)-1 {
-				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
+				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != lazyKleeneRune && re_runes[i+1] != '+' && re_runes[i+1] != lazyPlusRune && re_runes[i+1] != '?' && re_runes[i+1] != lazyQuestionRune && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
 					re_postfix = append(re_postfix, concatRune)
 				}
 			}
@@ -348,7 +438,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 	outQueue := make([]postfixNode, 0) // Output queue
 	// Actual algorithm
-	numOpenParens := 0 // Number of open parentheses
+	numOpenParens := 0                               // Number of open parentheses
 	parenIndices := make([]Group, 0)                 // I really shouldn't be using Group here, because that's strictly for matching purposes, but its a convenient way to store the indices of the opening and closing parens.
 	parenIndices = append(parenIndices, Group{0, 0}) // I append a weird value here, because the 0-th group doesn't have any parens. This way, the 1st group will be at index 1, 2nd at 2 ...
 	for i := 0; i < len(re_postfix); i++ {
 		/* Two cases:
 		1. Current character is alphanumeric - send to output queue
@@ -404,11 +496,44 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				} else {
 					return nil, fmt.Errorf("not enough hex characters found in expression")
 				}
-			} else if isOctal(re_postfix[i]) { // Octal value
+			} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
 				charClassInverted := (re_postfix[i] == 'P')
 				var charsInClass []rune
 				i++
 				if isUnicodeCharClassLetter(re_postfix[i]) {
 					var err error
 					charsInClass, err = unicodeCharClassToRange(string(re_postfix[i]))
 					if err != nil {
 						return nil, err
 					}
 				} else if re_postfix[i] == '{' {
 					i++ // Skip opening bracket
 					unicodeCharClassStr := ""
 					for re_postfix[i] != '}' {
 						unicodeCharClassStr += string(re_postfix[i])
 						i++
 					}
 					var err error
 					charsInClass, err = unicodeCharClassToRange(unicodeCharClassStr)
 					if err != nil {
 						return nil, err
 					}
 				} else {
 					return nil, fmt.Errorf("error parsing unicode character class in expression")
 				}
 				var toAppend postfixNode
 				if !charClassInverted { // \p
 					toAppend = newPostfixNode(charsInClass...)
 				} else { // \P
 					toAppend = newPostfixDotNode()
 					toAppend.except = append([]postfixNode{}, newPostfixNode(charsInClass...))
 				}
 				outQueue = append(outQueue, toAppend)
 			} else if re_postfix[i] == '0' { // Octal value
 				var octVal int64
 				var octValStr string
 				numDigitsParsed := 0
-				for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 {
+				for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 4 {
 					octValStr += string(re_postfix[i+numDigitsParsed])
 					numDigitsParsed++
 				}
@@ -421,6 +546,20 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				}
 				i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically
 				outQueue = append(outQueue, newPostfixCharNode(rune(octVal)))
 			} else if unicode.IsDigit(re_postfix[i]) { // Backreference
 				var num int64
 				var numStr string
 				numDigitsParsed := 0
 				for (i+numDigitsParsed) < len(re_postfix) && unicode.IsDigit(re_postfix[i+numDigitsParsed]) {
 					numStr += string(re_postfix[i+numDigitsParsed])
 					numDigitsParsed++
 				}
 				num, err := strconv.ParseInt(numStr, 10, 32)
 				if err != nil {
 					return nil, fmt.Errorf("error parsing backreference in expresion")
 				}
 				i += numDigitsParsed - 1
 				outQueue = append(outQueue, newPostfixBackreferenceNode(int(num)))
 			} else {
 				escapedNode, err := newEscapedNode(re_postfix[i], false)
 				if err != nil {
@@ -450,10 +589,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				if i >= len(re_postfix) {
 					return nil, fmt.Errorf("unclosed lookaround")
 				}
-				if re_postfix[i] == '(' || re_postfix[i] == nonCapLparenRune {
+				if (re_postfix[i] == '(' && re_postfix[i-1] != '\\') || re_postfix[i] == nonCapLparenRune {
 					numOpenParens++
 				}
-				if re_postfix[i] == ')' {
+				if re_postfix[i] == ')' && re_postfix[i-1] != '\\' {
 					numOpenParens--
 					if numOpenParens == 0 {
 						break
@@ -572,11 +711,44 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 						} else {
 							return nil, fmt.Errorf("not enough hex characters found in character class")
 						}
-					} else if isOctal(re_postfix[i]) { // Octal value
+					} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
 						charClassInverted := (re_postfix[i] == 'P')
 						var charsInList []rune
 						i++
 						if isUnicodeCharClassLetter(re_postfix[i]) {
 							var err error
 							charsInList, err = unicodeCharClassToRange(string(re_postfix[i]))
 							if err != nil {
 								return nil, err
 							}
 						} else if re_postfix[i] == '{' {
 							i++ // Skip opening bracket
 							unicodeCharClassStr := ""
 							for re_postfix[i] != '}' {
 								unicodeCharClassStr += string(re_postfix[i])
 								i++
 							}
 							var err error
 							charsInList, err = unicodeCharClassToRange(unicodeCharClassStr)
 							if err != nil {
 								return nil, err
 							}
 						} else {
 							return nil, fmt.Errorf("error parsing unicode character class in expression")
 						}
 						if !charClassInverted {
 							chars = append(chars, newPostfixNode(charsInList...))
 						} else {
 							toAppend := newPostfixDotNode()
 							toAppend.except = append([]postfixNode{}, newPostfixNode(charsInList...))
 							chars = append(chars, toAppend)
 						}
 					} else if re_postfix[i] == '0' { // Octal value
 						var octVal int64
 						var octValStr string
 						numDigitsParsed := 0
-						for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
+						for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 4 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
 							octValStr += string(re_postfix[i+numDigitsParsed])
 							numDigitsParsed++
 						}
@@ -773,6 +945,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 			}
 			outQueue[idx].startReps = startRangeNum
 			outQueue[idx].endReps = endRangeNum
 			if i < len(re_postfix)-1 && re_postfix[i+1] == '?' { // lazy repitition
 				outQueue[idx].isLazy = true
 				i++
 			}
 		}
 		if c == '(' || c == nonCapLparenRune {
 			opStack = append(opStack, c)
@@ -780,6 +956,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				outQueue = append(outQueue, newPostfixNode(c))
 			}
 			numOpenParens++
 			parenIndices = append(parenIndices, Group{StartIdx: len(outQueue) - 1}) // Push the index of the lparen into parenIndices
 		}
 		if c == ')' {
 			// Keep popping from opStack until we encounter an opening parantheses or a NONCAPLPAREN_CHAR. Throw error if we reach the end of the stack.
@@ -796,6 +973,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 			if val == '(' {       // Whatever was inside the parentheses was a _capturing_ group, so we append the closing parentheses as well
 				outQueue = append(outQueue, newPostfixNode(')')) // Add closing parentheses
 			}
 			parenIndices[numOpenParens].EndIdx = len(outQueue) - 1
 			numOpenParens--
 		}
 	}
@@ -810,6 +988,11 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		return nil, fmt.Errorf("imbalanced parantheses")
 	}
 	//	outQueue, _, err := rewriteBackreferences(outQueue, parenIndices)
 	//	if err != nil {
 	//		return nil, err
 	//	}
 	return outQueue, nil
 }
@@ -1021,6 +1204,21 @@ func thompson(re []postfixNode) (Reg, error) {
 			})
 			nfa = append(nfa, toAdd)
 		}
 		if c.nodetype == backreferenceNode {
 			if c.referencedGroup > numGroups {
 				return Reg{}, fmt.Errorf("invalid backreference")
 			}
 			stateToAdd := &nfaState{}
 			stateToAdd.assert = noneAssert
 			stateToAdd.content = newContents(epsilon)
 			stateToAdd.isEmpty = true
 			stateToAdd.isBackreference = true
 			stateToAdd.output = make([]*nfaState, 0)
 			stateToAdd.output = append(stateToAdd.output, stateToAdd)
 			stateToAdd.referredGroup = c.referencedGroup
 			stateToAdd.threadBackref = 0
 			nfa = append(nfa, stateToAdd)
 		}
 		// Must be an operator if it isn't a character
 		switch c.nodetype {
 		case concatenateNode:
@@ -1044,6 +1242,9 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, err
 			}
 			if c.isLazy {
 				stateToAdd.isLazy = true
 			}
 			nfa = append(nfa, stateToAdd)
 		case plusNode: // a+ is equivalent to aa*
 			s1 := mustPop(&nfa)
@@ -1051,6 +1252,9 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, err
 			}
 			if c.isLazy {
 				s2.isLazy = true
 			}
 			s1 = concatenate(s1, s2)
 			nfa = append(nfa, s1)
 		case questionNode: // ab? is equivalent to a(b|)
@@ -1062,6 +1266,9 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, err
 			}
 			if c.isLazy {
 				s2.isLazy = true
 			}
 			nfa = append(nfa, s2)
 		case pipeNode:
 			// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
@@ -1117,6 +1324,9 @@ func thompson(re []postfixNode) (Reg, error) {
 				if err != nil {
 					return Reg{}, err
 				}
 				if c.isLazy {
 					s2.isLazy = true
 				}
 				stateToAdd = concatenate(stateToAdd, s2)
 			} else { // Case 2
 				for i := c.startReps; i < c.endReps; i++ {
@@ -1124,6 +1334,9 @@ func thompson(re []postfixNode) (Reg, error) {
 					if err != nil {
 						return Reg{}, fmt.Errorf("error processing bounded repetition")
 					}
 					if c.isLazy {
 						tmp.isLazy = true
 					}
 					stateToAdd = concatenate(stateToAdd, tmp)
 				}
 			}
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -18,7 +18,7 @@ Single characters:
 	[^abc]			Negated character class - match any character except a, b and c
 	[^a-z]			Negated character range - do not match any character from a to z
 	\[				Match a literal '['. Backslashes can escape any character with special meaning, including another backslash.
-	\452			Match the character with the octal value 452 (up to 3 digits)
+	\0452			Match the character with the octal value 452 (up to 4 digits, first digit must be 0)
 	\xFF			Match the character with the hex value FF (exactly 2 characters)
 	\x{0000FF}		Match the character with the hex value 0000FF (exactly 6 characters)
 	\n				Newline
@@ -33,7 +33,7 @@ Perl classes:
 	\d				Match any digit character ([0-9])
 	\D				Match any non-digit character ([^0-9])
 	\w				Match any word character ([a-zA-Z0-9_])
-	\W				Match any word character ([^a-zA-Z0-9_])
+	\W				Match any non-word character ([^a-zA-Z0-9_])
 	\s				Match any whitespace character ([ \t\n])
 	\S				Match any non-whitespace character ([^ \t\n])
@@ -60,14 +60,24 @@ Composition:
 	x|y				Match x or y (prefer x)
 	xy|z			Match xy or z (prefer xy)
-Repitition (always greedy, preferring more):
+Repitition:
-	x*				Match x zero or more times
+	Greedy:
-	x+				Match x one or more times
+	x*				Match x zero or more times, prefer more
-	x?				Match x zero or one time
+	x+				Match x one or more times, prefer more
-	x{m,n}			Match x between m and n times (inclusive)
+	x?				Match x zero or one time, prefer one
-	x{m,}			Match x atleast m times
+	x{m,n}			Match x between m and n times (inclusive), prefer more
-	x{,n}			Match x between 0 and n times (inclusive)
+	x{m,}			Match x atleast m times, prefer more
 	x{,n}			Match x between 0 and n times (inclusive), prefer more
 	x{m}			Match x exactly m times
 	Lazy:
 	x*?				Match x zero or more times, prefer fewer
 	x+?				Match x one or more times, prefer fewer
 	x??				Match x zero or one time, prefer zero
 	x{m,n}?			Match x between m and n times (inclusive), prefer fewer
 	x{m,}?			Match x atleast m times, prefer fewer
 	x{,n}?			Match x between 0 and n times (inclusive), prefer fewer
 	x{m}			Match x exactly m times
 Grouping:
@@ -93,6 +103,10 @@ Lookarounds:
 	(?<=x)y			Positive lookbehind - Match y if preceded by x
 	(?<!x)y			Negative lookbehind - Match y if NOT preceded by x
 Backreferences:
 	(xy)\1			Match 'xy' followed by the text most recently captured by group 1 (in this case, 'xy')
 Numeric ranges:
 	<x-y>			Match any number from x to y (inclusive) (x and y must be positive numbers)
@@ -103,33 +117,13 @@ Numeric ranges:
 The engine and the API differ from [regexp] in a few ways, some of them very subtle.
 The key differences are mentioned below.
-1. Greediness:
+1. Byte-slices and runes:
 This engine does not support non-greedy operators. All operators are always greedy in nature, and will try
 to match as much as they can, while still allowing for a successful match. For example, given the regex:
 	y*y
 The engine will match as many 'y's as it can, while still allowing the trailing 'y' to be matched.
 Another, more subtle example is the following regex:
 	x|xx
 While the stdlib implementation (and most other engines) will prefer matching the first item of the alternation,
 this engine will go for the longest possible match, regardless of the order of the alternation. Although this
 strays from the convention, it results in a nice rule-of-thumb - the engine is ALWAYS greedy.
 The stdlib implementation has a function [regexp.Regexp.Longest] which makes future searches prefer the longest match.
 That is the default (and unchangable) behavior in this engine.
 2. Byte-slices and runes:
 My engine does not support byte-slices. When a matching function receives a string, it converts it into a
 rune-slice to iterate through it. While this has some space overhead, the convenience of built-in unicode
 support made the tradeoff worth it.
-3. Return values
+2. Return values
 Rather than using primitives for return values, my engine defines two types that are used as return
 values: a [Group] represents a capturing group, and a [Match] represents a list of groups.
@@ -164,14 +158,15 @@ returns the 0-group.
 The following features from [regexp] are (currently) NOT supported:
 1. Named capturing groups
- 2. Non-greedy operators
+ 2. Negated POSIX classes
- 3. Unicode character classes
+ 3. Embedded flags (flags are instead passed as arguments to [Compile])
- 4. Embedded flags (flags are passed as arguments to [Compile])
+ 4. Literal text with \Q ... \E
- 5. Literal text with \Q ... \E
+ 5. Finite repetition with no start (defaulting at 0)
 The following features are not available in [regexp], but are supported in my engine:
 1. Lookarounds
 2. Numeric ranges
 3. Backreferences
 I hope to shorten the first list, and expand the second.
 */
--- a/regex/example_test.go
+++ b/regex/example_test.go
@@ -2,6 +2,7 @@ package regex_test
 import (
 	"fmt"
 	"strings"
 	"gitea.twomorecents.org/Rockingcool/kleingrep/regex"
 )
@@ -32,12 +33,12 @@ func ExampleReg_FindAll() {
 }
 func ExampleReg_FindString() {
-	regexStr := `\d+`
+	regexStr := `\w+\s+(?=sheep)`
 	regexComp := regex.MustCompile(regexStr)
-	matchStr := regexComp.FindString("The year of our lord, 2025")
+	matchStr := regexComp.FindString("pink cows and yellow sheep")
 	fmt.Println(matchStr)
-	// Output: 2025
+	// Output: yellow
 }
 func ExampleReg_FindSubmatch() {
@@ -53,6 +54,71 @@ func ExampleReg_FindSubmatch() {
 	// 2	3
 }
 func ExampleReg_FindStringSubmatch() {
 	regexStr := `(\d{4})-(\d{2})-(\d{2})`
 	regexComp := regex.MustCompile(regexStr)
 	inputStr := `The date is 2025-02-10`
 	match := regexComp.FindStringSubmatch(inputStr)
 	fmt.Println(match[1])
 	fmt.Println(match[3])
 	// Output: 2025
 	// 10
 }
 func ExampleReg_FindAllSubmatch() {
 	regexStr := `(\d)\.(\d)(\d)`
 	regexComp := regex.MustCompile(regexStr)
 	matches := regexComp.FindAllSubmatch("3.14+8.97")
 	fmt.Println(matches[0][0]) // 0-group (entire match) of 1st match (0-indexed)
 	fmt.Println(matches[0][1]) // 1st group of 1st match
 	fmt.Println(matches[1][0]) // 0-group of 2nd match
 	fmt.Println(matches[1][1]) // 1st group of 2nd math
 	// Output: 0	4
 	// 0	1
 	// 5	9
 	// 5	6
 }
 func ExampleReg_FindAllString() {
 	regexStr := `<0-255>\.<0-255>\.<0-255>\.<0-255>`
 	inputStr := `192.168.220.7 pings 9.9.9.9`
 	regexComp := regex.MustCompile(regexStr)
 	matchStrs := regexComp.FindAllString(inputStr)
 	fmt.Println(matchStrs[0])
 	fmt.Println(matchStrs[1])
 	// Output: 192.168.220.7
 	// 9.9.9.9
 }
 func ExampleReg_FindAllStringSubmatch() {
 	// 'https' ...
 	// followed by 1 or more alphanumeric characters (including period) ...
 	// then a forward slash ...
 	// followed by one more of :
 	// 		word character,
 	// 		question mark,
 	// 		period,
 	// 		equals sign
 	regexStr := `https://([a-z0-9\.]+)/([\w.?=]+)`
 	regexComp := regex.MustCompile(regexStr, regex.RE_CASE_INSENSITIVE)
 	inputStr := `You can find me at https://twomorecents.org/index.html and https://news.ycombinator.com/user?id=aadhavans`
 	matchIndices := regexComp.FindAllStringSubmatch(inputStr)
 	fmt.Println(matchIndices[0][1]) // 1st group of 1st match (0-indexed)
 	fmt.Println(matchIndices[0][2]) // 2nd group of 1st match
 	fmt.Println(matchIndices[1][1]) // 1st group of 2nd match
 	fmt.Println(matchIndices[1][2]) // 2nd group of 2nd match
 	// Output: twomorecents.org
 	// index.html
 	// news.ycombinator.com
 	// user?id=aadhavans
 }
 func ExampleReg_Expand() {
 	inputStr := `option1: value1
 	option2: value2`
@@ -89,3 +155,27 @@ func ExampleReg_Longest() {
 	// Output: x
 	// xx
 }
 func ExampleReg_ReplaceAll() {
 	regexStr := `(\d)(\w)`
 	inputStr := "5d9t"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAll(inputStr, `$2$1`))
 	// Output: d5t9
 }
 func ExampleReg_ReplaceAllLiteral() {
 	regexStr := `fox|dog`
 	inputStr := "the quick brown fox jumped over the lazy dog"
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAllLiteral(inputStr, `duck`))
 	// Output: the quick brown duck jumped over the lazy duck
 }
 func ExampleReg_ReplaceAllFunc() {
 	regexStr := `\w{5,}`
 	inputStr := `all five or more letter words in this string are capitalized`
 	regexComp := regex.MustCompile(regexStr)
 	fmt.Println(regexComp.ReplaceAllFunc(inputStr, strings.ToUpper))
 	// Output: all five or more LETTER WORDS in this STRING are CAPITALIZED
 }
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -205,22 +205,45 @@ func (re Reg) FindAllSubmatch(str string) []Match {
 	return indices
 }
 // FindAllSubmatch returns a double-slice of strings. Each slice contains the text of a match, including all submatches.
 // A return value of nil indicates no match.
 func (re Reg) FindAllStringSubmatch(str string) [][]string {
 	match := re.FindAllSubmatch(str)
 	if len(match) == 0 {
 		return nil
 	}
 	rtv := make([][]string, len(match))
 	for i := range rtv {
 		rtv[i] = make([]string, re.numGroups+1)
 	}
 	rtv = funcMap(match, func(m Match) []string {
 		return funcMap(m, func(g Group) string {
 			if g.IsValid() {
 				return str[g.StartIdx:g.EndIdx]
 			} else {
 				return ""
 			}
 		})
 	})
 	return rtv
 }
 func addStateToList(str []rune, idx int, list []nfaState, state nfaState, threadGroups []Group, visited []nfaState, preferLongest bool) []nfaState {
 	if stateExists(list, state) || stateExists(visited, state) {
 		return list
 	}
 	visited = append(visited, state)
-	if state.isKleene || state.isQuestion {
+	if (state.isKleene || state.isQuestion) && (state.isLazy == false) { // Greedy quantifiers
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
+		list := addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		copyThread(state.next, state)
 		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		return list
 	}
-	if state.isAlternation {
+	if state.isAlternation || ((state.isKleene || state.isQuestion) && state.isLazy) { // Alternation or lazy quantifier
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
+		list := addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		copyThread(state.splitState, state)
 		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		return list
@@ -234,10 +257,12 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 	}
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
 		copyThread(state.next, state)
 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	if state.groupEnd {
 		state.threadGroups[state.groupNum].EndIdx = idx
 		copyThread(state.next, state)
 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	return append(list, state)
@@ -290,11 +315,25 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 				if !preferLongest {
 					break
 				}
-			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
+			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.isBackreference && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
 				if currentState.contentContains(str, idx, preferLongest) {
 					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
 				}
 			} else if currentState.isBackreference && currentState.threadGroups[currentState.referredGroup].IsValid() {
 				groupLength := currentState.threadGroups[currentState.referredGroup].EndIdx - currentState.threadGroups[currentState.referredGroup].StartIdx
 				if currentState.threadBackref == groupLength {
 					currentState.threadBackref = 0
 					copyThread(currentState.next, currentState)
 					currentStates = addStateToList(str, idx, currentStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
 				} else {
 					idxInReferredGroup := currentState.threadGroups[currentState.referredGroup].StartIdx + currentState.threadBackref
 					if idxInReferredGroup < len(str) && idx < len(str) && str[idxInReferredGroup] == str[idx] {
 						currentState.threadBackref += 1
 						nextStates = append(nextStates, currentState)
 					}
 				}
 			}
 		}
 		currentStates = append([]nfaState{}, nextStates...)
 		nextStates = nil
@@ -327,7 +366,7 @@ func (re Reg) Expand(dst string, template string, src string, match Match) strin
 				i++
 			} else {
 				numStr := ""
-				for unicode.IsDigit(templateRuneSlc[i]) {
+				for i < len(templateRuneSlc) && unicode.IsDigit(templateRuneSlc[i]) {
 					numStr += string(templateRuneSlc[i])
 					i++
 				}
@@ -372,3 +411,66 @@ func (re Reg) LiteralPrefix() (prefix string, complete bool) {
 	}
 	return prefix, complete
 }
 // ReplaceAll replaces all matches of the expression in src, with the text in repl. In repl, variables are interpreted
 // as they are in [Reg.Expand]. The resulting string is returned.
 func (re Reg) ReplaceAll(src string, repl string) string {
 	matches := re.FindAllSubmatch(src)
 	i := 0
 	currentMatch := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(matches) && matches[currentMatch][0].IsValid() && i == matches[currentMatch][0].StartIdx {
 			dst += re.Expand("", repl, src, matches[currentMatch])
 			i = matches[currentMatch][0].EndIdx
 			currentMatch++
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
 // ReplaceAllLiteral replaces all matches of the expression in src, with the text in repl. The text is replaced directly,
 // without any expansion.
 func (re Reg) ReplaceAllLiteral(src string, repl string) string {
 	zerogroups := re.FindAll(src)
 	currentMatch := 0
 	i := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
 			dst += repl
 			i = zerogroups[currentMatch].EndIdx
 			currentMatch += 1
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
 // ReplaceAllFunc replaces every match of the expression in src, with the return value of the function replFunc.
 // replFunc takes in the matched string. The return value is substituted in directly without expasion.
 func (re Reg) ReplaceAllFunc(src string, replFunc func(string) string) string {
 	zerogroups := re.FindAll(src)
 	currentMatch := 0
 	i := 0
 	dst := ""
 	for i < len(src) {
 		if currentMatch < len(zerogroups) && i == zerogroups[currentMatch].StartIdx {
 			dst += replFunc(src[zerogroups[currentMatch].StartIdx:zerogroups[currentMatch].EndIdx])
 			i = zerogroups[currentMatch].EndIdx
 			currentMatch += 1
 		} else {
 			dst += string(src[i])
 			i++
 		}
 	}
 	return dst
 }
--- a/regex/misc.go
+++ b/regex/misc.go
@@ -16,8 +16,11 @@ var rparenRune rune = 0xF0006
 var nonCapLparenRune rune = 0xF0007 // Represents a non-capturing group's LPAREN
 var escBackslashRune rune = 0xF0008 // Represents an escaped backslash
 var charRangeRune rune = 0xF0009    // Represents a character range
 var lazyKleeneRune rune = 0xF000A   // Represents a lazy kleene star
 var lazyPlusRune rune = 0xF000B     // Represents a lazy plus operator
 var lazyQuestionRune rune = 0xF000C // Represents a lazy question operator
-var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
+var specialChars = []rune{'?', lazyQuestionRune, '*', lazyKleeneRune, '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', lazyPlusRune, '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
 // An interface for int and rune, which are identical
 type character interface {
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -34,6 +34,7 @@ type nfaState struct {
 	isKleene                   bool       // Identifies whether current node is a 0-state representing Kleene star
 	isQuestion                 bool       // Identifies whether current node is a 0-state representing the question operator
 	isAlternation              bool       // Identifies whether current node is a 0-state representing an alternation
 	isLazy                     bool       // Only for split states - Identifies whether or not to flip the order of branches (try one branch before the other)
 	splitState                 *nfaState  // Only for alternation states - the 'other' branch of the alternation ('next' is the first)
 	assert                     assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
 	allChars                   bool       // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
@@ -44,9 +45,11 @@ type nfaState struct {
 	groupBegin                 bool       // Whether or not the node starts a capturing group
 	groupEnd                   bool       // Whether or not the node ends a capturing group
 	groupNum                   int        // Which capturing group the node starts / ends
 	isBackreference            bool       // Whether or not current node is backreference
 	referredGroup              int        // If current node is a backreference, the node that it points to
 	// The following properties depend on the current match - I should think about resetting them for every match.
-	zeroMatchFound bool    // Whether or not the state has been used for a zero-length match - only relevant for zero states
+	threadGroups  []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
-	threadGroups   []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
+	threadBackref int     // If current node is a backreference, how many characters to look forward into the referred group
 }
 // Clones the NFA starting from the given state.
@@ -75,14 +78,16 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 		isKleene:        stateToClone.isKleene,
 		isQuestion:      stateToClone.isQuestion,
 		isAlternation:   stateToClone.isAlternation,
 		isLazy:          stateToClone.isLazy,
 		assert:          stateToClone.assert,
 		zeroMatchFound:  stateToClone.zeroMatchFound,
 		allChars:        stateToClone.allChars,
 		except:          append([]rune{}, stateToClone.except...),
 		lookaroundRegex: stateToClone.lookaroundRegex,
 		groupEnd:        stateToClone.groupEnd,
 		groupBegin:      stateToClone.groupBegin,
 		groupNum:        stateToClone.groupNum,
 		isBackreference: stateToClone.isBackreference,
 		referredGroup:   stateToClone.referredGroup,
 	}
 	cloneMap[stateToClone] = clone
 	for i, s := range stateToClone.output {
@@ -122,6 +127,7 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	}
 	// Assuming it hasn't been visited
 	state.threadGroups = nil
 	state.threadBackref = 0
 	visitedMap[state] = true
 	if state.isAlternation {
 		resetThreadsHelper(state.next, visitedMap)
@@ -419,6 +425,7 @@ func (s nfaState) equals(other nfaState) bool {
 		s.next == other.next &&
 		s.isKleene == other.isKleene &&
 		s.isQuestion == other.isQuestion &&
 		s.isLazy == other.isLazy &&
 		s.isAlternation == other.isAlternation &&
 		s.splitState == other.splitState &&
 		s.assert == other.assert &&
@@ -428,7 +435,8 @@ func (s nfaState) equals(other nfaState) bool {
 		s.groupBegin == other.groupBegin &&
 		s.groupEnd == other.groupEnd &&
 		s.groupNum == other.groupNum &&
-		slices.Equal(s.threadGroups, other.threadGroups)
+		slices.Equal(s.threadGroups, other.threadGroups) &&
 		s.threadBackref == other.threadBackref
 }
 func stateExists(list []nfaState, s nfaState) bool {
--- a/regex/postfixNode.go
+++ b/regex/postfixNode.go
@@ -1,6 +1,8 @@
 package regex
-import "fmt"
+import (
 	"fmt"
 )
 type nodeType int
@@ -20,6 +22,7 @@ const (
 	assertionNode
 	lparenNode
 	rparenNode
 	backreferenceNode
 )
 // Helper constants for lookarounds
@@ -31,15 +34,17 @@ const lookbehind = -1
 var infinite_reps int = -1 // Represents infinite reps eg. the end range in {5,}
 // This represents a node in the postfix representation of the expression
 type postfixNode struct {
-	nodetype       nodeType
+	nodetype        nodeType
-	contents       []rune        // Contents of the node
+	contents        []rune        // Contents of the node
-	startReps      int           // Minimum number of times the node should be repeated - used with numeric specifiers
+	startReps       int           // Minimum number of times the node should be repeated - used with numeric specifiers
-	endReps        int           // Maximum number of times the node should be repeated - used with numeric specifiers
+	endReps         int           // Maximum number of times the node should be repeated - used with numeric specifiers
-	allChars       bool          // Whether or not the current node represents all characters (eg. dot metacharacter)
+	allChars        bool          // Whether or not the current node represents all characters (eg. dot metacharacter)
-	except         []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
+	except          []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
-	lookaroundSign int           // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
+	lookaroundSign  int           // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
-	lookaroundDir  int           // Lookbehind or lookahead
+	lookaroundDir   int           // Lookbehind or lookahead
-	nodeContents   []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
+	nodeContents    []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
 	referencedGroup int           // ONLY USED WHEN nodetype == backreferenceNode. Holds the group which this one refers to. After parsing is done, the expression will be rewritten eg. (a)\1 will become (a)(a). So the return value of ShuntingYard() shouldn't contain a backreferenceNode.
 	isLazy          bool          // ONLY USED WHEN nodetype == kleene or question
 }
 // Converts the given list of postfixNodes to one node of type CHARCLASS.
@@ -158,10 +163,19 @@ func newPostfixNode(contents ...rune) postfixNode {
 		switch contents[0] {
 		case '+':
 			to_return.nodetype = plusNode
 		case lazyPlusRune:
 			to_return.nodetype = plusNode
 			to_return.isLazy = true
 		case '?':
 			to_return.nodetype = questionNode
 		case lazyQuestionRune:
 			to_return.nodetype = questionNode
 			to_return.isLazy = true
 		case '*':
 			to_return.nodetype = kleeneNode
 		case lazyKleeneRune:
 			to_return.nodetype = kleeneNode
 			to_return.isLazy = true
 		case '|':
 			to_return.nodetype = pipeNode
 		case concatRune:
@@ -208,3 +222,44 @@ func newPostfixCharNode(contents ...rune) postfixNode {
 	toReturn.contents = append(toReturn.contents, contents...)
 	return toReturn
 }
 // newPostfixBackreferenceNode creates and returns a backreference node, referring to the given group
 func newPostfixBackreferenceNode(referred int) postfixNode {
 	toReturn := postfixNode{}
 	toReturn.startReps = 1
 	toReturn.endReps = 1
 	toReturn.nodetype = backreferenceNode
 	toReturn.referencedGroup = referred
 	return toReturn
 }
 // rewriteBackreferences rewrites any backreferences in the given postfixNode slice, into their respective groups.
 // It stores the relation in a map, and returns it as the second return value.
 // It uses parenIndices to determine where a group starts and ends in nodes.
 // For example, \1(a) will be rewritten into (a)(a), and 1 -> 2 will be the hashmap value.
 // It returns an error if a backreference points to an invalid group.
 // func rewriteBackreferences(nodes []postfixNode, parenIndices []Group) ([]postfixNode, map[int]int, error) {
 // 	rtv := make([]postfixNode, 0)
 // 	referMap := make(map[int]int)
 // 	numGroups := 0
 // 	groupIncrement := 0 // If we have a backreference before the group its referring to, then the group its referring to will have its group number incremented.
 // 	for i, node := range nodes {
 // 		if node.nodetype == backreferenceNode {
 // 			if node.referencedGroup >= len(parenIndices) {
 // 				return nil, nil, fmt.Errorf("invalid backreference")
 // 			}
 // 			rtv = slices.Concat(rtv, nodes[parenIndices[node.referencedGroup].StartIdx:parenIndices[node.referencedGroup].EndIdx+1]) // Add all the nodes in the group to rtv
 // 			numGroups += 1
 // 			if i < parenIndices[node.referencedGroup].StartIdx {
 // 				groupIncrement += 1
 // 			}
 // 			referMap[numGroups] = node.referencedGroup + groupIncrement
 // 		} else {
 // 			rtv = append(rtv, node)
 // 			if node.nodetype == lparenNode {
 // 				numGroups += 1
 // 			}
 // 		}
 // 	}
 // 	return rtv, referMap, nil
 // }
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -117,6 +117,7 @@ var reTests = []struct {
 	{`\d{3,4}`, nil, "ababab555", []Group{{6, 9}}},
 	{`\bpaint\b`, nil, "paints", []Group{}},
 	{`\b\w{5}\b`, nil, "paint", []Group{{0, 5}}},
 	{`\w{}`, nil, "test", nil},
 	{`[^\w]`, nil, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
 	{`[^\W]`, nil, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
 	{`[\[\]]`, nil, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
@@ -179,7 +180,7 @@ var reTests = []struct {
 	{"[[:graph:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{0, 70}}},
 	// Test cases from Python's RE test suite
-	{`[\1]`, nil, "\x01", []Group{{0, 1}}},
+	{`[\01]`, nil, "\x01", []Group{{0, 1}}},
 	{`\0`, nil, "\x00", []Group{{0, 1}}},
 	{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
@@ -194,7 +195,7 @@ var reTests = []struct {
 	{`\x00ffffffffffffff`, nil, "\xff", []Group{}},
 	{`\x00f`, nil, "\x0f", []Group{}},
 	{`\x00fe`, nil, "\xfe", []Group{}},
-	{`^\w+=(\\[\000-\277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
+	{`^\w+=(\\[\000-\0277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
 	{`a.b`, nil, `acb`, []Group{{0, 3}}},
 	{`a.b`, nil, "a\nb", []Group{}},
@@ -312,11 +313,7 @@ var reTests = []struct {
 	{`a[-]?c`, nil, `ac`, []Group{{0, 2}}},
 	{`^(.+)?B`, nil, `AB`, []Group{{0, 2}}},
 	{`\0009`, nil, "\x009", []Group{{0, 2}}},
-	{`\141`, nil, "a", []Group{{0, 1}}},
+	{`\0141`, nil, "a", []Group{{0, 1}}},
 	// At this point, the python test suite has a bunch
 	// of backreference tests. Since my engine doesn't
 	// implement backreferences, I've skipped those tests.
 	{`*a`, nil, ``, nil},
 	{`(*)b`, nil, ``, nil},
@@ -433,7 +430,8 @@ var reTests = []struct {
 	{`a[-]?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AC`, []Group{{0, 2}}},
 	{`^(.+)?B`, []ReFlag{RE_CASE_INSENSITIVE}, `ab`, []Group{{0, 2}}},
 	{`\0009`, []ReFlag{RE_CASE_INSENSITIVE}, "\x009", []Group{{0, 2}}},
-	{`\141`, []ReFlag{RE_CASE_INSENSITIVE}, "A", []Group{{0, 1}}},
+	{`\0141`, []ReFlag{RE_CASE_INSENSITIVE}, "A", []Group{{0, 1}}},
 	{`\0141\0141`, []ReFlag{RE_CASE_INSENSITIVE}, "AA", []Group{{0, 2}}},
 	{`a[-]?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AC`, []Group{{0, 2}}},
@@ -464,8 +462,10 @@ var reTests = []struct {
 	{`[\D5]+`, nil, `1234abc5678`, []Group{{4, 8}}},
 	{`[\da-fA-F]+`, nil, `123abc`, []Group{{0, 6}}},
 	{`\xff`, nil, "\u00ff", []Group{{0, 1}}},
 	{`\xff+`, nil, "\u00ff\u00ff", []Group{{0, 2}}},
 	{`\xFF`, nil, "\u00ff", []Group{{0, 1}}},
 	{`\x00ff`, nil, "\u00ff", []Group{}},
 	{`\x{0000ff}+`, nil, "\u00ff\u00ff", []Group{{0, 2}}},
 	{`\x{0000ff}`, nil, "\u00ff", []Group{{0, 1}}},
 	{`\x{0000FF}`, nil, "\u00ff", []Group{{0, 1}}},
 	{"\t\n\v\r\f\a", nil, "\t\n\v\r\f\a", []Group{{0, 6}}},
@@ -473,7 +473,7 @@ var reTests = []struct {
 	{`[\t][\n][\v][\r][\f][\b]`, nil, "\t\n\v\r\f\b", []Group{{0, 6}}},
 	{`.*d`, nil, "abc\nabd", []Group{{4, 7}}},
 	{`(`, nil, "-", nil},
-	{`[\41]`, nil, `!`, []Group{{0, 1}}},
+	{`[\041]`, nil, `!`, []Group{{0, 1}}},
 	{`(?<!abc)(d.f)`, nil, `abcdefdof`, []Group{{6, 9}}},
 	{`[\w-]+`, nil, `laser_beam`, []Group{{0, 10}}},
 	{`M+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
@@ -489,7 +489,25 @@ var reTests = []struct {
 	{`[b-e]`, nil, `f`, []Group{}},
 	{`*?`, nil, `-`, nil},
-	{`a*?`, nil, `-`, nil}, // non-greedy operators are not supported
+	{`a.+c`, nil, `abcabc`, []Group{{0, 6}}},
 	// Lazy quantifier tests
 	{`a.+?c`, nil, `abcabc`, []Group{{0, 3}, {3, 6}}},
 	{`ab*?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}},
 	{`ab+?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}},
 	{`ab??bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}},
 	{`ab??bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
 	{`ab??bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{}},
 	{`ab??c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
 	{`a.*?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AXYZC`, []Group{{0, 5}}},
 	{`a.+?c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCABC`, []Group{{0, 3}, {3, 6}}},
 	{`a.*?c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCABC`, []Group{{0, 3}, {3, 6}}},
 	{`.*?\S *:`, nil, `xx:`, []Group{{0, 3}}},
 	{`a[ ]*? (\d+).*`, nil, `a   10`, []Group{{0, 6}}},
 	{`a[ ]*? (\d+).*`, nil, `a    10`, []Group{{0, 7}}},
 	{`"(?:\\"|[^"])*?"`, nil, `"\""`, []Group{{0, 4}}},
 	{`^.*?$`, nil, "one\ntwo\nthree", []Group{}},
 	{`a[^>]*?b`, nil, `a>b`, []Group{}},
 	{`^a*?$`, nil, `foo`, []Group{}},
 	// Numeric range tests - this is a feature that I added, and doesn't exist
 	// in any other mainstream regex engine
@@ -520,6 +538,30 @@ var reTests = []struct {
 	{`<389-400`, nil, `-`, nil},
 	{`<389-400>`, nil, `391`, []Group{{0, 3}}},
 	{`\b<1-10000>\b`, nil, `America declared independence in 1776.`, []Group{{33, 37}}},
 	{`\p{Tamil}+`, nil, `உயிரெழுத்து`, []Group{{0, 11}}}, // Each letter and matra is counted as a separate rune, so 'u', 'ya', 'e (matra), 'ra', 'e (matra)', 'zha', (oo (matra), 'tha', 'ith', 'tha', 'oo (matra)'.
 	{`\P{Tamil}+`, nil, `vowel=உயிரெழுத்து`, []Group{{0, 6}}},
 	{`\P`, nil, `உயிரெழுத்து`, nil},
 	{`\PM\pM*`, nil, `உயிரெழுத்து`, []Group{{0, 1}, {1, 3}, {3, 5}, {5, 7}, {7, 9}, {9, 11}}},
 	{`\pN+`, nil, `123abc456def`, []Group{{0, 3}, {6, 9}}},
 	{`\PN+`, nil, `123abc456def`, []Group{{3, 6}, {9, 12}}},
 	{`[\p{Greek}\p{Cyrillic}]`, nil, `ΣωШД`, []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}}},
 	{`(?<=\().*?(?=\))`, nil, `(abc)`, []Group{{1, 4}}},
 	{`((a|b)\2)`, nil, `aa`, []Group{{0, 2}}},
 	{`((a|b)\2)`, nil, `bb`, []Group{{0, 2}}},
 	{`((a|b)\2)`, nil, `ab`, []Group{}},
 	{`((a|b)\2)`, nil, `ba`, []Group{}},
 	{`((a|b)\2){3}`, nil, `aaaaaa`, []Group{{0, 6}}},
 	{`((a|b)\2){3}`, nil, `bbbbbb`, []Group{{0, 6}}},
 	{`((a|b)\2){3}`, nil, `bbaaaa`, []Group{{0, 6}}},
 	{`((a|b)\2){3}`, nil, `aabbaa`, []Group{{0, 6}}},
 	{`((a|b)\2){3}`, nil, `aaaabb`, []Group{{0, 6}}},
 	{`((a|b)\2){3}`, nil, `bbaabb`, []Group{{0, 6}}},
 	{`((a|b)\2){3}`, nil, `baabab`, []Group{}},
 	{`((a|b)\2){3}`, nil, `bbabab`, []Group{}},
 }
 var groupTests = []struct {
@@ -581,13 +623,37 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `bcdd`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, nil, `a`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, nil, `a!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\041`, nil, `a!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
-	// At this point, the python test suite has a bunch
+	// Backreference tests
-	// of backreference tests. Since my engine doesn't
+	{`(abc)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
-	// implement backreferences, I've skipped those tests.
+	{`([a-c]+)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
 	{`([a-c]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
 	{`^(.+)?B`, nil, `AB`, []Match{[]Group{{0, 2}, {0, 1}}}},
 	{`(a+).\1$`, nil, `aaaaa`, []Match{[]Group{{0, 5}, {0, 2}}}},
 	{`^(a+).\1$`, nil, `aaaa`, []Match{}},
 	{`(a)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
 	{`(a+)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
 	{`(a+)+\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
 	{`(a).+\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(a)ba*\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(aa|a)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(a|aa)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(a+)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`([abc]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
 	{`(a)(?:b)\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
 	{`(a)(?:b)\1`, nil, `abb`, []Match{}},
 	{`(?:a)(b)\1`, nil, `aba`, []Match{}},
 	{`(?:a)(b)\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
 	{`(?:(cat)|(dog))\2`, nil, `catdog`, []Match{}},
 	{`(?:a)\1`, nil, `aa`, nil},
 	{`((cat)|(dog)|(cow)|(bat))\4`, nil, `cowcow`, []Match{[]Group{{0, 6}, {0, 3}, {-1, -1}, {-1, -1}, {0, 3}, {-1, -1}}}},
 	{`(a|b)*\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
 	{`(a|b)*\1`, nil, `aba`, []Match{}},
 	{`(a|b)*\1`, nil, `bab`, []Match{}},
 	{`(a|b)*\1`, nil, `baa`, []Match{[]Group{{0, 3}, {1, 2}}}},
 	{`(a)(b)c|ab`, nil, `ab`, []Match{[]Group{{0, 2}}}},
 	{`(a)+x`, nil, `aaax`, []Match{[]Group{{0, 4}, {2, 3}}}},
@@ -636,7 +702,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `BCDD`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\041`, []ReFlag{RE_CASE_INSENSITIVE}, `A!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, []ReFlag{RE_CASE_INSENSITIVE}, `(A, B)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
 	{`(a)(b)c|ab`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}}}},
@@ -688,6 +754,18 @@ var groupTests = []struct {
 	// {`(a|ab|c|bcd)*(d*)`, nil, `ababcd`, []Match{[]Group{{0, 6}, {3, 6}, {6, 6}}, []Group{{6, 6}, {6, 6}, {6, 6}}}},
 	// // Bug - this should give {0,3},{0,3},{0,0},{0,3},{3,3} but it gives {0,3},{0,2},{0,1},{1,2},{2,3}
 	// //	{`((a*)(b|abc))(c*)`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 3}, {0, 0}, {0, 3}, {3, 3}}}},
 	// Lazy quantifier tests
 	{`a(?:b|c|d)+?(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
 	{`a(?:b|(c|e){1,2}?|d)+?(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {1, 2}, {2, 3}}}},
 	{`(?<!-):(.*?)(?<!-):`, nil, `a:bc-:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
 	{`(?<!\\):(.*?)(?<!\\):`, nil, `a:bc\:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
 	{`(?<!\?)'(.*?)(?<!\?)'`, nil, `a'bc?'de'f`, []Match{[]Group{{1, 9}, {2, 8}}}},
 	{`.*?x\s*\z(.*)`, []ReFlag{RE_MULTILINE, RE_SINGLE_LINE}, "xx\nx\n", []Match{[]Group{{0, 5}, {5, 5}}}},
 	{`.*?x\s*\z(.*)`, []ReFlag{RE_MULTILINE}, "xx\nx\n", []Match{[]Group{{3, 5}, {5, 5}}}},
 	{`^([ab]*?)(?=(b)?)c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}, {-1, -1}}}},
 	{`^([ab]*?)(?!(b))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}, {-1, -1}}}},
 	{`^([ab]*?)(?<!(a))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}, {-1, -1}}}},
 }
 func TestFind(t *testing.T) {
@@ -792,23 +870,24 @@ func TestFindSubmatch(t *testing.T) {
 				if test.result != nil {
 					panic(err)
 				}
-			}
+			} else {
-			match, err := regComp.FindSubmatch(test.str)
+				match, err := regComp.FindSubmatch(test.str)
-			if err != nil {
+				if err != nil {
-				if len(test.result) != 0 {
+					if len(test.result) != 0 {
-					t.Errorf("Wanted %v got no match\n", test.result[0])
+						t.Errorf("Wanted %v got no match\n", test.result[0])
 				}
 			} else if len(test.result) == 0 {
 				t.Errorf("Wanted no match got %v\n", match)
 			}
 			for i := range match {
 				if match[i].IsValid() {
 					if test.result[0][i] != match[i] {
 						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 					}
-				} else {
+				} else if len(test.result) == 0 {
-					if i < len(test.result) && test.result[0][i].IsValid() {
+					t.Errorf("Wanted no match got %v\n", match)
-						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
+				}
 				for i := range match {
 					if match[i].IsValid() {
 						if test.result[0][i] != match[i] {
 							t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 						}
 					} else {
 						if i < len(test.result) && test.result[0][i].IsValid() {
 							t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
 						}
 					}
 				}
 			}
@@ -823,10 +902,22 @@ func TestFindStringSubmatch(t *testing.T) {
 				if test.result != nil {
 					panic(err)
 				}
-			}
+			} else {
-			matchStr := regComp.FindStringSubmatch(test.str)
+				matchStr := regComp.FindStringSubmatch(test.str)
-			if matchStr == nil {
+				if matchStr == nil {
-				if len(test.result) != 0 {
+					if len(test.result) != 0 {
 						expectedStr := funcMap(test.result[0], func(g Group) string {
 							if g.IsValid() {
 								return test.str[g.StartIdx:g.EndIdx]
 							} else {
 								return ""
 							}
 						})
 						t.Errorf("Wanted %v got no match\n", expectedStr)
 					}
 				} else if len(test.result) == 0 {
 					t.Errorf("Wanted no match got %v\n", matchStr)
 				} else {
 					expectedStr := funcMap(test.result[0], func(g Group) string {
 						if g.IsValid() {
 							return test.str[g.StartIdx:g.EndIdx]
@@ -834,26 +925,15 @@ func TestFindStringSubmatch(t *testing.T) {
 							return ""
 						}
 					})
-					t.Errorf("Wanted %v got no match\n", expectedStr)
+					for i, groupStr := range matchStr {
-				}
+						if groupStr == "" {
-			} else if len(test.result) == 0 {
+							if i < len(expectedStr) && expectedStr[i] != "" {
-				t.Errorf("Wanted no match got %v\n", matchStr)
+								t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
-			} else {
+							}
-				expectedStr := funcMap(test.result[0], func(g Group) string {
+						} else {
-					if g.IsValid() {
+							if expectedStr[i] != groupStr {
-						return test.str[g.StartIdx:g.EndIdx]
+								t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
-					} else {
+							}
 						return ""
 					}
 				})
 				for i, groupStr := range matchStr {
 					if groupStr == "" {
 						if i < len(expectedStr) && expectedStr[i] != "" {
 							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
 						}
 					} else {
 						if expectedStr[i] != groupStr {
 							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
 						}
 					}
 				}
@@ -861,6 +941,61 @@ func TestFindStringSubmatch(t *testing.T) {
 		})
 	}
 }
 func TestFindAllStringSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
 			regComp, err := Compile(test.re, test.flags...)
 			if err != nil {
 				if test.result != nil {
 					panic(err)
 				}
 			} else {
 				matchStrs := regComp.FindAllStringSubmatch(test.str)
 				if matchStrs == nil {
 					if len(test.result) != 0 {
 						expectedStrs := funcMap(test.result, func(m Match) []string {
 							return funcMap(m, func(g Group) string {
 								if g.IsValid() {
 									return test.str[g.StartIdx:g.EndIdx]
 								} else {
 									return ""
 								}
 							})
 						})
 						t.Errorf("Wanted %v got no match\n", expectedStrs)
 					}
 				} else if len(test.result) == 0 {
 					t.Errorf("Wanted no match got %v\n", matchStrs)
 				} else {
 					expectedStrs := funcMap(test.result, func(m Match) []string {
 						return funcMap(m, func(g Group) string {
 							if g.IsValid() {
 								return test.str[g.StartIdx:g.EndIdx]
 							} else {
 								return ""
 							}
 						})
 					})
 					for i, matchStr := range matchStrs {
 						for j, groupStr := range matchStr {
 							if groupStr == "" {
 								if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
 									t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
 								}
 							} else {
 								if expectedStrs[i][j] != groupStr {
 									t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
 								}
 							}
 						}
 					}
 				}
 			}
 		})
 	}
 }
 func TestFindAllSubmatch(t *testing.T) {
 	for _, test := range groupTests {
 		t.Run(test.re+"	"+test.str, func(t *testing.T) {
@@ -869,17 +1004,18 @@ func TestFindAllSubmatch(t *testing.T) {
 				if test.result != nil {
 					panic(err)
 				}
-			}
+			} else {
-			matchIndices := regComp.FindAllSubmatch(test.str)
+				matchIndices := regComp.FindAllSubmatch(test.str)
-			for i := range matchIndices {
+				for i := range matchIndices {
-				for j := range matchIndices[i] {
+					for j := range matchIndices[i] {
-					if matchIndices[i][j].IsValid() {
+						if matchIndices[i][j].IsValid() {
-						if test.result[i][j] != matchIndices[i][j] {
+							if test.result[i][j] != matchIndices[i][j] {
-							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
+								t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
-						}
+							}
-					} else {
+						} else {
-						if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
+							if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
-							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
+								t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
 							}
 						}
 					}
 				}
--- a/regex/todo.txt
+++ b/regex/todo.txt
@@ -4,4 +4,5 @@
 Ideas for flags:
    -m <num> : Print <num>th match (-m 1 = first match, -m 2 = second match)
    -g <num> : Print the <num>th group
    -r : Specify a directory instead of a file, reads recursively
 4. Refactor code for flags - make each flag's code a function, which modifies the result of findAllMatches
Author	SHA1	Message	Date
Aadhavan Srinivasan	e489dc4c27	Started working on line number flag	2025-03-15 16:24:50 -04:00
Aadhavan Srinivasan	34149980a4	Started working on multiple filename arguments; prefix each line with filename containing the line; mostly indentation changes	2025-03-13 12:11:54 -04:00
Aadhavan Srinivasan	e79c19a929	Updated TODO	2025-03-12 16:46:57 -04:00
Aadhavan Srinivasan	d2bce37935	Updated argument count validation	2025-03-12 16:46:05 -04:00
Aadhavan Srinivasan	bb3b866b77	Started working on file arguments - stdin is used if arg is "-"	2025-03-12 16:44:40 -04:00
Aadhavan Srinivasan	e07f27dc78	Merge branch 'master' of https://gitea.twomorecents.org/Rockingcool/kleingrep	2025-02-24 07:46:54 -05:00
Aadhavan Srinivasan	65d2317f79	Added more backreference tests	2025-02-21 08:44:33 -05:00
Aadhavan Srinivasan	a631fc289c	Clone 'isBackreference' and 'referredGroup' NFA fields, because they aren't thread variables	2025-02-21 08:44:24 -05:00
Aadhavan Srinivasan	d62a429cce	Updated documentation	2025-02-20 19:58:07 -05:00
Aadhavan Srinivasan	7b31031553	Change when a newline is printed; so that we don't print extraneous newlinesraneous newlines	2025-02-17 09:37:31 -05:00
Aadhavan Srinivasan	38c842cb07	Added method to get length of unique array	2025-02-17 09:36:38 -05:00
Aadhavan Srinivasan	9f9af36be8	Fixed bug where escaped parentheses in lookarounds were counted as regular parentheses instead of literals	2025-02-17 09:36:17 -05:00
Aadhavan Srinivasan	8217b67122	Added test for escaped parentheses in lookarounds	2025-02-17 09:35:06 -05:00
Aadhavan Srinivasan	1f06dcef64	Just declare the variable instead of initializing it as well	2025-02-16 15:51:53 -05:00
Aadhavan Srinivasan	119475b41b	Updated README	2025-02-14 12:13:01 -05:00
Aadhavan Srinivasan	6151cc8cf6	Updated documentation	2025-02-14 12:07:43 -05:00
Aadhavan Srinivasan	3eaf4eb19c	Updated README	2025-02-14 12:00:33 -05:00
Aadhavan Srinivasan	d453815831	Added README	2025-02-14 11:59:43 -05:00
Aadhavan Srinivasan	3a2916baae	Set 'isLazy' to true in the NFA, if the postfixNode has the flag set	2025-02-14 11:37:48 -05:00
Aadhavan Srinivasan	9d6344719f	Reverse order of trying branches if the quantifier is lazy	2025-02-14 11:37:28 -05:00
Aadhavan Srinivasan	f5c868566b	Added field to NFA, denoting if a node is lazy or not	2025-02-14 11:37:14 -05:00
Aadhavan Srinivasan	1cd6da218f	Added lazy quantifier tests	2025-02-14 11:36:56 -05:00
Aadhavan Srinivasan	277cbc0fc5	Started working on lazy quantifier support	2025-02-13 20:50:30 -05:00
Aadhavan Srinivasan	3924502b72	Added code to return lazy quantifier postfixNodes	2025-02-13 20:50:11 -05:00
Aadhavan Srinivasan	36b009747b	Added metacharacters for lazy quantifiers	2025-02-13 20:49:54 -05:00
Aadhavan Srinivasan	6cd0a10a8f	Added more documentation	2025-02-13 14:14:00 -05:00
Aadhavan Srinivasan	69fb96c43d	Merge pull request 'Implement Unicode character classes' (#4 ) from implementUnicodeCharClass into master Reviewed-on: #4	2025-02-13 09:51:44 -06:00
Aadhavan Srinivasan	46bc0c8529	Removed unicode character classes from 'features not supported' list	2025-02-13 10:48:23 -05:00
Aadhavan Srinivasan	1a890a1e75	Refactoring - remove duplicate code	2025-02-13 09:10:40 -05:00
Aadhavan Srinivasan	fde3784e5a	Added unicode charclass support within character classes; Fixed bugs with hex classes and unicode classes	2025-02-13 08:58:02 -05:00
Aadhavan Srinivasan	7045711860	Convert test_str into a rune slice for better unicode compatibility, it also fixed the bug where all unicode characters wouldn't be colored	2025-02-13 08:57:06 -05:00
Aadhavan Srinivasan	d4d606d95b	Added tests for unicode character classes; more tests for hex characters	2025-02-13 08:55:12 -05:00
Aadhavan Srinivasan	9cd330e521	More work on unicode character class support - fix bug where all characters aren't being matched	2025-02-12 23:04:10 -05:00
Aadhavan Srinivasan	44d6a2005c	Started working on unicode character classes	2025-02-12 22:19:30 -05:00
Aadhavan Srinivasan	f76cd6c3d9	Merge pull request 'Implement Backreferences' (#3 ) from implementBackreferences into master Reviewed-on: #3	2025-02-12 21:17:32 -06:00
Aadhavan Srinivasan	375baa1722	Wrote more backreference tests	2025-02-12 07:51:20 -05:00
Aadhavan Srinivasan	2e47c631bb	Updated documentation to include backreferences	2025-02-12 07:50:59 -05:00
Aadhavan Srinivasan	81b8b1b11c	Do not validate a backreference if the group that it refers to is not valid	2025-02-11 19:12:58 -05:00
Aadhavan Srinivasan	2934e7a20f	Wrote tests for backreferences	2025-02-11 19:12:40 -05:00
Aadhavan Srinivasan	f466d4a8d5	More progress on backreference implementation	2025-02-11 17:06:39 -05:00
Aadhavan Srinivasan	8327450dd2	Started implementing backreferences (octal values should now be prefaced with \0)	2025-02-11 16:14:54 -05:00
Aadhavan Srinivasan	073f231b89	Added function and examples for ReplaceAllFunc()	2025-02-10 21:35:51 -05:00
Aadhavan Srinivasan	3b7257c921	Wrote function and example for ReplaceAllLiteral()	2025-02-10 21:25:49 -05:00
Aadhavan Srinivasan	668df8b70a	Wrote MarshalText() and UnmarshalText() to implement TextMarshaler and TextUnmarshaler	2025-02-10 12:30:48 -05:00
Aadhavan Srinivasan	214acf7e0f	Wrote example for ReplaceAll(); fixed out-of-bounds bug in Expand()	2025-02-10 12:30:17 -05:00
Aadhavan Srinivasan	50221ff4d9	Wrote ReplaceAll(), to replace all matches of the regex with a given string	2025-02-10 12:29:54 -05:00
Aadhavan Srinivasan	5ab95f512a	Updated docs	2025-02-10 09:36:00 -05:00
Aadhavan Srinivasan	e7da678408	Removed obsolete documentation	2025-02-10 09:35:16 -05:00
Aadhavan Srinivasan	ab363e2766	Rewrote test for 'FindString()' to use lookarounds	2025-02-10 09:24:47 -05:00
Aadhavan Srinivasan	c803e45415	Added example for 'FindStringSubmatch()'	2025-02-10 09:19:24 -05:00
Aadhavan Srinivasan	525296f239	Added examples for 'FindAllString()' , 'FindAllSubmatch()' and 'FindAllStringSubmatch()'	2025-02-10 09:10:39 -05:00
Aadhavan Srinivasan	eb0ab9f7ec	Wrote test for FindAllStringSubmatch()	2025-02-10 08:39:20 -05:00
Aadhavan Srinivasan	17a7dbae4c	Wrote FindAllStringSubmatch()	2025-02-10 08:39:10 -05:00
Aadhavan Srinivasan	f2279acd98	Fixed mistake in docs	2025-02-10 08:12:09 -05:00