Updated README

Makefile changes
Renamed 'cmd' to 'kg' so that go install works correctly
2025-03-28 09:09:50 -04:00 · 2025-03-28 09:07:29 -04:00 · 2025-03-28 09:06:12 -04:00 · 2025-03-25 10:28:29 -04:00 · 2025-03-18 11:45:50 -04:00 · 2025-03-16 19:48:49 -04:00
14 changed files with 835 additions and 377 deletions
--- a/2
+++ b/2
@@ -8,6 +8,6 @@ vet: fmt
 buildLib: vet
 	go build -gcflags="all=-N -l" ./...
 buildCmd: buildLib
-	go build -C cmd/ -gcflags="all=-N -l" -o re ./...
+	go build -C kg/ -gcflags="all=-N -l" -o kg ./...
 test: buildCmd
 	go test -v ./...
--- a/README.md
+++ b/README.md
@@ -0,0 +1,21 @@
+## Kleingrep
+
+Kleingrep is a regular expression engine, providing a library and command-line tool written in Go.
+
+It aims to provide a more featureful engine, compared to the one in Go's
+[regexp](https://pkg.go.dev/regexp), while retaining some semblance of efficiency.
+
+The engine does __not__ use backtracking, relying on the NFA-based method described in
+[Russ Cox's articles](https://swtch.com/~rsc/regexp). As such, it is immune to catastrophic backtracking.
+
+It also includes features not present in regexp, such as lookarounds and backreferences.
+
+### Syntax
+
+The syntax is, for the most part, a superset of Go's regexp. A full overview of the syntax can be found [here](https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex#hdr-Syntax).
+
+__For more information, see https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex__.
+
+### How it works
+
+I've written about the inner workings of the engine [on my blog](https://twomorecents.org/writing-regex-engine/index.html).
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -1,225 +0,0 @@
-package main
-
-import (
-	"bufio"
-	"flag"
-	"fmt"
-	"io"
-	"os"
-
-	"github.com/fatih/color"
-
-	reg "gitea.twomorecents.org/Rockingcool/kleingrep/regex"
-)
-
-func main() {
-	// Flags for the regex Compile function
-	flagsToCompile := make([]reg.ReFlag, 0)
-
-	invertFlag := flag.Bool("v", false, "Invert match.")
-	// This flag has two 'modes':
-	// 1. Without '-v': Prints only matches. Prints a newline after every match.
-	// 2. With '-v': Substitutes all matches with empty string.
-	onlyFlag := flag.Bool("o", false, "Print only colored content. Overrides -l.")
-	lineFlag := flag.Bool("l", false, "Only print lines with a match (or with no matches, if -v is enabled). Similar to grep's default.")
-	multiLineFlag := flag.Bool("t", false, "Multi-line mode. Treats newline just like any character.")
-	printMatchesFlag := flag.Bool("p", false, "Prints start and end index of each match. Can only be used with '-t' for multi-line mode.")
-	caseInsensitiveFlag := flag.Bool("i", false, "Case-insensitive. Disregard the case of all characters.")
-	matchNum := flag.Int("m", 0, "Print the match with the given index. Eg. -m 3 prints the third match.")
-	substituteText := flag.String("s", "", "Substitute the contents of each match with the given string. Overrides -o and -v")
-	flag.Parse()
-
-	// These flags have to be passed to the Compile function
-	if *multiLineFlag {
-		flagsToCompile = append(flagsToCompile, reg.RE_MULTILINE, reg.RE_SINGLE_LINE)
-	}
-	if *caseInsensitiveFlag {
-		flagsToCompile = append(flagsToCompile, reg.RE_CASE_INSENSITIVE)
-	}
-
-	// -l and -o are mutually exclusive: -o overrides -l
-	if *onlyFlag {
-		*lineFlag = false
-	}
-	// Check if substitute and matchNum flags have been enabled
-	substituteFlagEnabled := false
-	matchNumFlagEnabled := false
-	flag.Visit(func(f *flag.Flag) {
-		if f.Name == "s" {
-			substituteFlagEnabled = true
-		}
-		if f.Name == "m" {
-			matchNumFlagEnabled = true
-		}
-	})
-
-	// Validate matchNumFlag - must be positive integer
-	if matchNumFlagEnabled && *matchNum < 1 {
-		panic("Invalid match number to print.")
-	}
-
-	// Process:
-	// 1. Convert regex into postfix notation (Shunting-Yard algorithm)
-	// 		a. Add explicit concatenation operators to facilitate this
-	// 2. Build NFA from postfix representation (Thompson's algorithm)
-	// 3. Run the string against the NFA
-
-	if len(flag.Args()) != 1 { // flag.Args() also strips out program name
-		fmt.Println("ERROR: Missing cmdline args")
-		os.Exit(22)
-	}
-	var re string
-	re = flag.Args()[0]
-	var test_str string
-	var err error
-	var linesRead bool // Whether or not we have read the lines in the file
-	lineNum := 0       // Current line number
-	// Create reader for stdin and writer for stdout
-	reader := bufio.NewReader(os.Stdin)
-	out := bufio.NewWriter(os.Stdout)
-
-	regComp, err := reg.Compile(re, flagsToCompile...)
-	if err != nil {
-		fmt.Println(err)
-		return
-	}
-	for true {
-		if linesRead {
-			break
-		}
-		if !(*multiLineFlag) {
-			// Read every string from stdin until we encounter an error. If the error isn't EOF, panic.
-			test_str, err = reader.ReadString('\n')
-			lineNum++
-			if err != nil {
-				if err == io.EOF {
-					linesRead = true
-				} else {
-					panic(err)
-				}
-			}
-			if len(test_str) > 0 && test_str[len(test_str)-1] == '\n' {
-				test_str = test_str[:len(test_str)-1]
-			}
-		} else {
-			// Multi-line mode - read every line of input into a temp. string.
-			// test_str will contain all lines of input (including newline characters)
-			// as one string.
-			var temp string
-			for temp, err = reader.ReadString('\n'); err == nil; temp, err = reader.ReadString('\n') {
-				test_str += temp
-			}
-			// Assuming err != nil
-			if err == io.EOF {
-				if len(temp) > 0 {
-					test_str += temp // Add the last line (if it is non-empty)
-				}
-				linesRead = true
-			} else {
-				panic(err)
-			}
-		}
-		matchIndices := make([]reg.Match, 0)
-		if matchNumFlagEnabled {
-			tmp, err := regComp.FindNthMatch(test_str, *matchNum)
-			if err == nil {
-				matchIndices = append(matchIndices, tmp)
-			}
-		} else {
-			matchIndices = regComp.FindAllSubmatch(test_str)
-		}
-
-		if *printMatchesFlag {
-			// if we are in single line mode, print the line on which
-			// the matches occur
-			if len(matchIndices) > 0 {
-				if !(*multiLineFlag) {
-					fmt.Fprintf(out, "Line %d:\n", lineNum)
-				}
-				for _, m := range matchIndices {
-					fmt.Fprintf(out, "%s\n", m.String())
-				}
-				err := out.Flush()
-				if err != nil {
-					panic(err)
-				}
-			}
-			continue
-		}
-		// Decompose the array of matchIndex structs into a flat unique array of ints - if matchIndex is {4,7}, flat array will contain 4,5,6
-		// This should make checking O(1) instead of O(n)
-		indicesToPrint := new_uniq_arr[int]()
-		for _, idx := range matchIndices {
-			indicesToPrint.add(genRange(idx[0].StartIdx, idx[0].EndIdx)...)
-		}
-		// If we are inverting, then we should print the indices which _didn't_ match
-		// in color.
-		if *invertFlag {
-			oldIndices := indicesToPrint.values()
-			indicesToPrint = new_uniq_arr[int]()
-			// Explanation:
-			// Find all numbers from 0 to len(test_str) that are NOT in oldIndices.
-			// These are the values we want to print, now that we have inverted the match.
-			// Re-initialize indicesToPrint and add all of these values to it.
-			indicesToPrint.add(setDifference(genRange(0, len(test_str)), oldIndices)...)
-
-		}
-		// If lineFlag is enabled, we should only print something if:
-		// 		a. We are not inverting, and have at least one match on the current line
-		// 		OR
-		// 		b. We are inverting, and have no matches at all on the current line.
-		// This checks for the inverse, and continues if it is true.
-		if *lineFlag {
-			if !(*invertFlag) && len(matchIndices) == 0 || *invertFlag && len(matchIndices) > 0 {
-				continue
-			}
-		}
-
-		// If we are substituting, we need a different behavior, as follows:
-		// For every character in the test string:
-		// 		1. Check if the index is the start of any matchIndex
-		// 		2. If so, print the substitute text, and set our index to
-		//			the corresponding end index.
-		// 		3. If not, just print the character.
-		if substituteFlagEnabled {
-			for i := range test_str {
-				inMatchIndex := false
-				for _, m := range matchIndices {
-					if i == m[0].StartIdx {
-						fmt.Fprintf(out, "%s", *substituteText)
-						i = m[0].EndIdx
-						inMatchIndex = true
-						break
-					}
-				}
-				if !inMatchIndex {
-					fmt.Fprintf(out, "%c", test_str[i])
-				}
-			}
-		} else {
-			for i, c := range test_str {
-				if indicesToPrint.contains(i) {
-					color.New(color.FgRed).Fprintf(out, "%c", c)
-					// Newline after every match - only if -o is enabled and -v is disabled.
-					if *onlyFlag && !(*invertFlag) {
-						for _, idx := range matchIndices {
-							if i+1 == idx[0].EndIdx { // End index is one more than last index of match
-								fmt.Fprintf(out, "\n")
-								break
-							}
-						}
-					}
-				} else {
-					if !(*onlyFlag) {
-						fmt.Fprintf(out, "%c", c)
-					}
-				}
-			}
-		}
-		err = out.Flush()
-		if err != nil {
-			panic(err)
-		}
-		fmt.Println()
-	}
-}
--- a/cmd/helpers.go
+++ b/cmd/helpers.go
--- a/kg/main.go
+++ b/kg/main.go
@@ -0,0 +1,284 @@
+package main
+
+import (
+	"bufio"
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"slices"
+
+	"github.com/fatih/color"
+
+	reg "gitea.twomorecents.org/Rockingcool/kleingrep/regex"
+)
+
+func main() {
+	// Flags for the regex Compile function
+	flagsToCompile := make([]reg.ReFlag, 0)
+
+	invertFlag := flag.Bool("v", false, "Invert match.")
+	// This flag has two 'modes':
+	// 1. Without '-v': Prints only matches. Prints a newline after every match.
+	// 2. With '-v': Substitutes all matches with empty string.
+	onlyFlag := flag.Bool("o", false, "Print only colored content. Overrides -l.")
+	lineFlag := flag.Bool("l", false, "Only print lines with a match (or with no matches, if -v is enabled). Similar to grep's default.")
+	multiLineFlag := flag.Bool("t", false, "Multi-line mode. Treats newline just like any character.")
+	printMatchesFlag := flag.Bool("p", false, "Prints start and end index of each match. Can only be used with '-t' for multi-line mode.")
+	caseInsensitiveFlag := flag.Bool("i", false, "Case-insensitive. Disregard the case of all characters.")
+	recursiveFlag := flag.Bool("r", false, "Recursively search all files in the given directory.")
+	lineNumFlag := flag.Bool("n", false, "For each line with a match, print the line number. Implies -l.")
+	matchNum := flag.Int("m", 0, "Print the match with the given index. Eg. -m 3 prints the third match.")
+	substituteText := flag.String("s", "", "Substitute the contents of each match with the given string. Overrides -o and -v")
+	flag.Parse()
+
+	// These flags have to be passed to the Compile function
+	if *multiLineFlag {
+		flagsToCompile = append(flagsToCompile, reg.RE_MULTILINE, reg.RE_SINGLE_LINE)
+	}
+	if *caseInsensitiveFlag {
+		flagsToCompile = append(flagsToCompile, reg.RE_CASE_INSENSITIVE)
+	}
+
+	// -l and -o are mutually exclusive: -o overrides -l
+	if *onlyFlag {
+		*lineFlag = false
+	}
+	// Check if substitute and matchNum flags have been enabled
+	substituteFlagEnabled := false
+	matchNumFlagEnabled := false
+	flag.Visit(func(f *flag.Flag) {
+		if f.Name == "s" {
+			substituteFlagEnabled = true
+		}
+		if f.Name == "m" {
+			matchNumFlagEnabled = true
+		}
+	})
+
+	// Validate matchNumFlag - must be positive integer
+	if matchNumFlagEnabled && *matchNum < 1 {
+		panic("Invalid match number to print.")
+	}
+
+	// Enable lineFlag if lineNumFlag is enabled
+	if *lineNumFlag {
+		*lineFlag = true
+	}
+
+	// Process:
+	// 1. Convert regex into postfix notation (Shunting-Yard algorithm)
+	// 		a. Add explicit concatenation operators to facilitate this
+	// 2. Build NFA from postfix representation (Thompson's algorithm)
+	// 3. Run the string against the NFA
+
+	if len(flag.Args()) < 1 { // flag.Args() also strips out program name
+		fmt.Printf("%s: ERROR: Missing cmdline args\n", os.Args[0])
+		os.Exit(22)
+	}
+	if *recursiveFlag && len(flag.Args()) < 2 { // File/Directory must be provided with '-r'
+		fmt.Printf("%s: ERROR: Missing cmdline args\n", os.Args[0])
+		os.Exit(22)
+	}
+	var re string
+	re = flag.Args()[0]
+	var inputFiles []*os.File
+	if len(flag.Args()) == 1 || flag.Args()[1] == "-" { // Either no file argument, or file argument is "-"
+		if !slices.Contains(inputFiles, os.Stdin) {
+			inputFiles = append(inputFiles, os.Stdin) // os.Stdin cannot be entered more than once into the file list
+		}
+	} else {
+		inputFilenames := flag.Args()[1:]
+		for _, inputFilename := range inputFilenames {
+			inputFile, err := os.Open(inputFilename)
+			if err != nil {
+				fmt.Printf("%s: %s: No such file or directory\n", os.Args[0], inputFilename)
+			} else {
+				fileStat, err := inputFile.Stat()
+				if err != nil {
+					fmt.Printf("%v\n", err)
+					os.Exit(2)
+				} else {
+					if fileStat.Mode().IsDir() {
+						fmt.Printf("%s: %s: Is a directory\n", os.Args[0], inputFilename)
+					} else {
+						inputFiles = append(inputFiles, inputFile)
+					}
+				}
+			}
+		}
+	}
+
+	var test_str string
+	var err error
+	var linesRead bool // Whether or not we have read the lines in the file
+	lineNum := 0       // Current line number
+	// Create writer for stdout
+	out := bufio.NewWriter(os.Stdout)
+	// Compile regex
+	regComp, err := reg.Compile(re, flagsToCompile...)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	for _, inputFile := range inputFiles {
+		lineNum = 0
+		reader := bufio.NewReader(inputFile)
+		linesRead = false
+		for true {
+			if linesRead {
+				break
+			}
+			if !(*multiLineFlag) {
+				// Read every string from stdin until we encounter an error. If the error isn't EOF, panic.
+				test_str, err = reader.ReadString('\n')
+				lineNum++
+				if err != nil {
+					if err == io.EOF {
+						linesRead = true
+					} else {
+						panic(err)
+					}
+				}
+				if len(test_str) > 0 && test_str[len(test_str)-1] == '\n' {
+					test_str = test_str[:len(test_str)-1]
+				}
+			} else {
+				// Multi-line mode - read every line of input into a temp. string.
+				// test_str will contain all lines of input (including newline characters)
+				// as one string.
+				var temp string
+				for temp, err = reader.ReadString('\n'); err == nil; temp, err = reader.ReadString('\n') {
+					test_str += temp
+				}
+				// Assuming err != nil
+				if err == io.EOF {
+					if len(temp) > 0 {
+						test_str += temp // Add the last line (if it is non-empty)
+					}
+					linesRead = true
+				} else {
+					panic(err)
+				}
+			}
+			matchIndices := make([]reg.Match, 0)
+			if matchNumFlagEnabled {
+				tmp, err := regComp.FindNthMatch(test_str, *matchNum)
+				if err == nil {
+					matchIndices = append(matchIndices, tmp)
+				}
+			} else {
+				matchIndices = regComp.FindAllSubmatch(test_str)
+			}
+
+			test_str_runes := []rune(test_str) // Converting to runes preserves unicode characters
+
+			if *printMatchesFlag {
+				// if we are in single line mode, print the line on which
+				// the matches occur
+				if len(matchIndices) > 0 {
+					if !(*multiLineFlag) {
+						fmt.Fprintf(out, "Line %d:\n", lineNum)
+					}
+					for _, m := range matchIndices {
+						fmt.Fprintf(out, "%s\n", m.String())
+					}
+					err := out.Flush()
+					if err != nil {
+						panic(err)
+					}
+				}
+				continue
+			}
+			// Decompose the array of matchIndex structs into a flat unique array of ints - if matchIndex is {4,7}, flat array will contain 4,5,6
+			// This should make checking O(1) instead of O(n)
+			indicesToPrint := new_uniq_arr[int]()
+			for _, idx := range matchIndices {
+				indicesToPrint.add(genRange(idx[0].StartIdx, idx[0].EndIdx)...)
+			}
+			// If we are inverting, then we should print the indices which _didn't_ match
+			// in color.
+			if *invertFlag {
+				oldIndices := indicesToPrint.values()
+				indicesToPrint = new_uniq_arr[int]()
+				// Explanation:
+				// Find all numbers from 0 to len(test_str_runes) that are NOT in oldIndices.
+				// These are the values we want to print, now that we have inverted the match.
+				// Re-initialize indicesToPrint and add all of these values to it.
+				indicesToPrint.add(setDifference(genRange(0, len(test_str_runes)), oldIndices)...)
+
+			}
+			// If lineFlag is enabled, we should only print something if:
+			// 		a. We are not inverting, and have at least one match on the current line
+			// 		OR
+			// 		b. We are inverting, and have no matches at all on the current line.
+			// This checks for the inverse, and continues if it is true.
+			if *lineFlag {
+				if !(*invertFlag) && len(matchIndices) == 0 || *invertFlag && len(matchIndices) > 0 {
+					continue
+				} else {
+					if *recursiveFlag || len(flag.Args()) > 2 { // If we have 2 args, then we're only searching 1 file. We should only print the filename if there's more than 1 file.
+						color.New(color.FgMagenta).Fprintf(out, "%s:", inputFile.Name()) // Print filename
+					}
+					if *lineNumFlag {
+						color.New(color.FgGreen).Fprintf(out, "%d:", lineNum) // Print filename
+					}
+				}
+			}
+
+			// If we are substituting, we need a different behavior, as follows:
+			// For every character in the test string:
+			// 		1. Check if the index is the start of any matchIndex
+			// 		2. If so, print the substitute text, and set our index to
+			//			the corresponding end index.
+			// 		3. If not, just print the character.
+			if substituteFlagEnabled {
+				for i := range test_str_runes {
+					inMatchIndex := false
+					for _, m := range matchIndices {
+						if i == m[0].StartIdx {
+							fmt.Fprintf(out, "%s", *substituteText)
+							i = m[0].EndIdx
+							inMatchIndex = true
+							break
+						}
+					}
+					if !inMatchIndex {
+						fmt.Fprintf(out, "%c", test_str_runes[i])
+					}
+				}
+			} else {
+				for i, c := range test_str_runes {
+					if indicesToPrint.contains(i) {
+						color.New(color.FgRed, color.Bold).Fprintf(out, "%c", c)
+						// Newline after every match - only if -o is enabled and -v is disabled.
+						if *onlyFlag && !(*invertFlag) {
+							for matchIdxNum, idx := range matchIndices {
+								if matchIdxNum < len(matchIndices)-1 { // Only print a newline afte printing a match, if there are multiple matches on the line, and we aren't on the last one. This is because the newline that gets added at the end will take care of that.
+									if i+1 == idx[0].EndIdx { // End index is one more than last index of match
+										fmt.Fprintf(out, "\n")
+										break
+									}
+								}
+							}
+						}
+					} else {
+						if !(*onlyFlag) {
+							fmt.Fprintf(out, "%c", c)
+						}
+					}
+				}
+			}
+			err = out.Flush()
+			if err != nil {
+				panic(err)
+			}
+			// If the last character in the string wasn't a newline, AND we either have don't -o set or we do (and we've matched something), then print a newline
+			if (len(test_str_runes) > 0 && test_str_runes[len(test_str_runes)-1] != '\n') &&
+				(!*onlyFlag || indicesToPrint.len() > 0) {
+				fmt.Println()
+			}
+		}
+	}
+}
--- a/cmd/unique_array.go
+++ b/cmd/unique_array.go
@@ -36,3 +36,7 @@ func (s uniq_arr[T]) values() []T {
 	}
 	return toRet
 }
+
+func (s uniq_arr[T]) len() int {
+	return len(s.backingMap)
+}
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -47,6 +47,7 @@ func (re *Reg) UnmarshalText(text []byte) error {
 	return err
 }

+// Longest makes future searches prefer the longest branch of an alternation, as opposed to the leftmost branch.
 func (re *Reg) Longest() {
 	re.preferLongest = true
 }
@@ -64,7 +65,7 @@ const (
 )

 func isOperator(c rune) bool {
-	if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune {
+	if c == '+' || c == '?' || c == '*' || c == '|' || c == concatRune || c == lazyPlusRune || c == lazyKleeneRune || c == lazyQuestionRune {
 		return true
 	}
 	return false
@@ -72,7 +73,7 @@ func isOperator(c rune) bool {

 /* priority returns the priority of the given operator */
 func priority(op rune) int {
-	precedence := []rune{'|', concatRune, '+', '*', '?'}
+	precedence := []rune{'|', concatRune, '+', lazyPlusRune, '*', lazyKleeneRune, '?', lazyQuestionRune}
 	return slices.Index(precedence, op)
 }

@@ -108,6 +109,48 @@ func getPOSIXClass(str []rune) (bool, string) {
 	return true, rtv
 }

+// isUnicodeCharClassLetter returns whether or not the given letter represents a unicode character class.
+func isUnicodeCharClassLetter(c rune) bool {
+	return slices.Contains([]rune{'L', 'M', 'S', 'N', 'P', 'C', 'Z'}, c)
+}
+
+// rangeTableToRuneSlice converts the given range table into a rune slice and returns it.
+func rangeTableToRuneSlice(rangetable *unicode.RangeTable) []rune {
+	var rtv []rune
+	for _, r := range rangetable.R16 {
+		for c := r.Lo; c <= r.Hi; c += r.Stride {
+			rtv = append(rtv, rune(c))
+		}
+	}
+	for _, r := range rangetable.R32 {
+		for c := r.Lo; c <= r.Hi; c += r.Stride {
+			rtv = append(rtv, rune(c))
+		}
+	}
+	return rtv
+}
+
+// unicodeCharClassToRange converts the given unicode character class name into a list of characters in that class.
+// This class could also be a single letter eg. 'C'.
+func unicodeCharClassToRange(class string) ([]rune, error) {
+	if len(class) == 0 {
+		return nil, fmt.Errorf("empty unicode character class")
+	}
+	if len(class) == 1 || len(class) == 2 {
+		if rangeTable, ok := unicode.Categories[class]; ok {
+			return rangeTableToRuneSlice(rangeTable), nil
+		} else {
+			return nil, fmt.Errorf("invalid short unicode character class")
+		}
+	} else {
+		if rangeTable, ok := unicode.Scripts[class]; ok {
+			return rangeTableToRuneSlice(rangeTable), nil
+		} else {
+			return nil, fmt.Errorf("invalid long unicode character class")
+		}
+	}
+}
+
 // Stores whether the case-insensitive flag has been enabled.
 var caseInsensitive bool

@@ -166,9 +209,6 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 	//		metacharacter. Later, in thompson(), these will be converted back. This avoids
 	//		confusion in detecting whether a character is escaped eg. detecting
 	// 		whether '\\[a]' has an escaped opening bracket (it doesn't).
-	//
-	// 	5. 	Check for non-greedy operators. These are not supported at the moment, so an error
-	// 		must be thrown if the user attempts to use a non-greedy operator.
 	for i := 0; i < len(re_runes_orig); i++ {
 		c := re_runes_orig[i]
 		if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
@@ -215,8 +255,16 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
 			re_runes = append(re_runes, rbracketRune)
 			continue
-		} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
-			return nil, fmt.Errorf("non-greedy operators are not supported")
+		} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i > 0 && re_runes_orig[i-1] != '\\') && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
+			switch c {
+			case '+':
+				re_runes = append(re_runes, lazyPlusRune)
+			case '*':
+				re_runes = append(re_runes, lazyKleeneRune)
+			case '?':
+				re_runes = append(re_runes, lazyQuestionRune)
+			}
+			i++
 		} else {
 			re_runes = append(re_runes, c)
 		}
@@ -309,17 +357,44 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 					}
 				} else if isHex(re_runes[i]) {
 					re_postfix = append(re_postfix, re_runes[i:i+2]...)
-					i += 2
+					i += 1 // I don't skip forward 2 steps, because the second step will happen with the loop increment
 				} else {
 					return nil, fmt.Errorf("invalid hex value in expression")
 				}
-			} else if isOctal(re_runes[i]) {
+			} else if re_runes[i] == 'p' || re_runes[i] == 'P' { // Unicode character class (P is negated unicode charclass)
+				re_postfix = append(re_postfix, re_runes[i])
+				i++
+				if i >= len(re_runes) {
+					return nil, fmt.Errorf("error parsing unicode character class in expression")
+				}
+				if re_runes[i] == '{' { // Full name charclass
+					for re_runes[i] != '}' {
+						re_postfix = append(re_postfix, re_runes[i])
+						i++
+					}
+					re_postfix = append(re_postfix, re_runes[i])
+					i++
+				} else if isUnicodeCharClassLetter(re_runes[i]) {
+					re_postfix = append(re_postfix, re_runes[i])
+					i++
+				} else {
+					return nil, fmt.Errorf("error parsing unicode character class in expression")
+				}
+				i-- // The loop increment at the top will move us forward
+			} else if re_runes[i] == '0' { // Start of octal value
 				numDigits := 1
-				for i+numDigits < len(re_runes) && numDigits < 3 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 3)
+				for i+numDigits < len(re_runes) && numDigits < 4 && isOctal(re_runes[i+numDigits]) { // Skip while we see an octal character (max of 4, starting with 0)
 					numDigits++
 				}
 				re_postfix = append(re_postfix, re_runes[i:i+numDigits]...)
 				i += (numDigits - 1) // I have to move back a step, so that I can add a concatenation operator if necessary, and so that the increment at the bottom of the loop works as intended
+			} else if unicode.IsDigit(re_runes[i]) { // Any other number - backreference
+				numDigits := 1
+				for i+numDigits < len(re_runes) && unicode.IsDigit(re_runes[i+numDigits]) { // Skip while we see a digit
+					numDigits++
+				}
+				re_postfix = append(re_postfix, re_runes[i:i+numDigits]...)
+				i += (numDigits - 1) // Move back a step to add concatenation operator
 			} else {
 				re_postfix = append(re_postfix, re_runes[i])
 			}
@@ -336,10 +411,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				if i >= len(re_runes) {
 					return nil, fmt.Errorf("unclosed lookaround")
 				}
-				if re_runes[i] == '(' || re_runes[i] == nonCapLparenRune {
+				if (re_runes[i] == '(' && re_runes[i-1] != '\\') || re_runes[i] == nonCapLparenRune {
 					numOpenParens++
 				}
-				if re_runes[i] == ')' {
+				if re_runes[i] == ')' && re_runes[i-1] != '\\' {
 					numOpenParens--
 					if numOpenParens == 0 {
 						break
@@ -352,7 +427,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		}
 		if i < len(re_runes) && (re_runes[i] != '(' && re_runes[i] != nonCapLparenRune && re_runes[i] != '|' && re_runes[i] != '\\') || (i > 0 && re_runes[i-1] == '\\') { // Every character should be concatenated if it is escaped
 			if i < len(re_runes)-1 {
-				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != '+' && re_runes[i+1] != '?' && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
+				if re_runes[i+1] != '|' && re_runes[i+1] != '*' && re_runes[i+1] != lazyKleeneRune && re_runes[i+1] != '+' && re_runes[i+1] != lazyPlusRune && re_runes[i+1] != '?' && re_runes[i+1] != lazyQuestionRune && re_runes[i+1] != ')' && re_runes[i+1] != '{' {
 					re_postfix = append(re_postfix, concatRune)
 				}
 			}
@@ -364,7 +439,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 	outQueue := make([]postfixNode, 0) // Output queue

 	// Actual algorithm
-	numOpenParens := 0 // Number of open parentheses
+	numOpenParens := 0                               // Number of open parentheses
+	parenIndices := make([]Group, 0)                 // I really shouldn't be using Group here, because that's strictly for matching purposes, but its a convenient way to store the indices of the opening and closing parens.
+	parenIndices = append(parenIndices, Group{0, 0}) // I append a weird value here, because the 0-th group doesn't have any parens. This way, the 1st group will be at index 1, 2nd at 2 ...
 	for i := 0; i < len(re_postfix); i++ {
 		/* Two cases:
 		1. Current character is alphanumeric - send to output queue
@@ -420,11 +497,44 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				} else {
 					return nil, fmt.Errorf("not enough hex characters found in expression")
 				}
-			} else if isOctal(re_postfix[i]) { // Octal value
+			} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
+				charClassInverted := (re_postfix[i] == 'P')
+				var charsInClass []rune
+				i++
+				if isUnicodeCharClassLetter(re_postfix[i]) {
+					var err error
+					charsInClass, err = unicodeCharClassToRange(string(re_postfix[i]))
+					if err != nil {
+						return nil, err
+					}
+				} else if re_postfix[i] == '{' {
+					i++ // Skip opening bracket
+					unicodeCharClassStr := ""
+					for re_postfix[i] != '}' {
+						unicodeCharClassStr += string(re_postfix[i])
+						i++
+					}
+					var err error
+					charsInClass, err = unicodeCharClassToRange(unicodeCharClassStr)
+					if err != nil {
+						return nil, err
+					}
+				} else {
+					return nil, fmt.Errorf("error parsing unicode character class in expression")
+				}
+				var toAppend postfixNode
+				if !charClassInverted { // \p
+					toAppend = newPostfixNode(charsInClass...)
+				} else { // \P
+					toAppend = newPostfixDotNode()
+					toAppend.except = append([]postfixNode{}, newPostfixNode(charsInClass...))
+				}
+				outQueue = append(outQueue, toAppend)
+			} else if re_postfix[i] == '0' { // Octal value
 				var octVal int64
 				var octValStr string
 				numDigitsParsed := 0
-				for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 {
+				for (i+numDigitsParsed) < len(re_postfix) && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 4 {
 					octValStr += string(re_postfix[i+numDigitsParsed])
 					numDigitsParsed++
 				}
@@ -437,6 +547,20 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				}
 				i += numDigitsParsed - 1 // Shift forward by the number of digits that were parsed. Move back one character, because the loop increment will move us back to the next character automatically
 				outQueue = append(outQueue, newPostfixCharNode(rune(octVal)))
+			} else if unicode.IsDigit(re_postfix[i]) { // Backreference
+				var num int64
+				var numStr string
+				numDigitsParsed := 0
+				for (i+numDigitsParsed) < len(re_postfix) && unicode.IsDigit(re_postfix[i+numDigitsParsed]) {
+					numStr += string(re_postfix[i+numDigitsParsed])
+					numDigitsParsed++
+				}
+				num, err := strconv.ParseInt(numStr, 10, 32)
+				if err != nil {
+					return nil, fmt.Errorf("error parsing backreference in expresion")
+				}
+				i += numDigitsParsed - 1
+				outQueue = append(outQueue, newPostfixBackreferenceNode(int(num)))
 			} else {
 				escapedNode, err := newEscapedNode(re_postfix[i], false)
 				if err != nil {
@@ -466,10 +590,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				if i >= len(re_postfix) {
 					return nil, fmt.Errorf("unclosed lookaround")
 				}
-				if re_postfix[i] == '(' || re_postfix[i] == nonCapLparenRune {
+				if (re_postfix[i] == '(' && re_postfix[i-1] != '\\') || re_postfix[i] == nonCapLparenRune {
 					numOpenParens++
 				}
-				if re_postfix[i] == ')' {
+				if re_postfix[i] == ')' && re_postfix[i-1] != '\\' {
 					numOpenParens--
 					if numOpenParens == 0 {
 						break
@@ -588,11 +712,44 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 						} else {
 							return nil, fmt.Errorf("not enough hex characters found in character class")
 						}
-					} else if isOctal(re_postfix[i]) { // Octal value
+					} else if re_postfix[i] == 'p' || re_postfix[i] == 'P' {
+						charClassInverted := (re_postfix[i] == 'P')
+						var charsInList []rune
+						i++
+						if isUnicodeCharClassLetter(re_postfix[i]) {
+							var err error
+							charsInList, err = unicodeCharClassToRange(string(re_postfix[i]))
+							if err != nil {
+								return nil, err
+							}
+						} else if re_postfix[i] == '{' {
+							i++ // Skip opening bracket
+							unicodeCharClassStr := ""
+							for re_postfix[i] != '}' {
+								unicodeCharClassStr += string(re_postfix[i])
+								i++
+							}
+							var err error
+							charsInList, err = unicodeCharClassToRange(unicodeCharClassStr)
+							if err != nil {
+								return nil, err
+							}
+						} else {
+							return nil, fmt.Errorf("error parsing unicode character class in expression")
+						}
+						if !charClassInverted {
+							chars = append(chars, newPostfixNode(charsInList...))
+						} else {
+							toAppend := newPostfixDotNode()
+							toAppend.except = append([]postfixNode{}, newPostfixNode(charsInList...))
+							chars = append(chars, toAppend)
+						}
+					} else if re_postfix[i] == '0' { // Octal value
+
 						var octVal int64
 						var octValStr string
 						numDigitsParsed := 0
-						for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 3 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
+						for (i+numDigitsParsed) < len(re_postfix)-1 && isOctal(re_postfix[i+numDigitsParsed]) && numDigitsParsed <= 4 { // The '-1' exists, because even in the worst case (the character class extends till the end), the last character must be a closing bracket (and nothing else)
 							octValStr += string(re_postfix[i+numDigitsParsed])
 							numDigitsParsed++
 						}
@@ -789,6 +946,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 			}
 			outQueue[idx].startReps = startRangeNum
 			outQueue[idx].endReps = endRangeNum
+			if i < len(re_postfix)-1 && re_postfix[i+1] == '?' { // lazy repitition
+				outQueue[idx].isLazy = true
+				i++
+			}
 		}
 		if c == '(' || c == nonCapLparenRune {
 			opStack = append(opStack, c)
@@ -796,6 +957,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 				outQueue = append(outQueue, newPostfixNode(c))
 			}
 			numOpenParens++
+			parenIndices = append(parenIndices, Group{StartIdx: len(outQueue) - 1}) // Push the index of the lparen into parenIndices
 		}
 		if c == ')' {
 			// Keep popping from opStack until we encounter an opening parantheses or a NONCAPLPAREN_CHAR. Throw error if we reach the end of the stack.
@@ -812,6 +974,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 			if val == '(' {       // Whatever was inside the parentheses was a _capturing_ group, so we append the closing parentheses as well
 				outQueue = append(outQueue, newPostfixNode(')')) // Add closing parentheses
 			}
+			parenIndices[numOpenParens].EndIdx = len(outQueue) - 1
 			numOpenParens--
 		}
 	}
@@ -826,6 +989,11 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 		return nil, fmt.Errorf("imbalanced parantheses")
 	}

+	//	outQueue, _, err := rewriteBackreferences(outQueue, parenIndices)
+	//	if err != nil {
+	//		return nil, err
+	//	}
+
 	return outQueue, nil
 }

@@ -1037,6 +1205,21 @@ func thompson(re []postfixNode) (Reg, error) {
 			})
 			nfa = append(nfa, toAdd)
 		}
+		if c.nodetype == backreferenceNode {
+			if c.referencedGroup > numGroups {
+				return Reg{}, fmt.Errorf("invalid backreference")
+			}
+			stateToAdd := &nfaState{}
+			stateToAdd.assert = noneAssert
+			stateToAdd.content = newContents(epsilon)
+			stateToAdd.isEmpty = true
+			stateToAdd.isBackreference = true
+			stateToAdd.output = make([]*nfaState, 0)
+			stateToAdd.output = append(stateToAdd.output, stateToAdd)
+			stateToAdd.referredGroup = c.referencedGroup
+			stateToAdd.threadBackref = 0
+			nfa = append(nfa, stateToAdd)
+		}
 		// Must be an operator if it isn't a character
 		switch c.nodetype {
 		case concatenateNode:
@@ -1060,6 +1243,9 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, err
 			}
+			if c.isLazy {
+				stateToAdd.isLazy = true
+			}
 			nfa = append(nfa, stateToAdd)
 		case plusNode: // a+ is equivalent to aa*
 			s1 := mustPop(&nfa)
@@ -1067,6 +1253,9 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, err
 			}
+			if c.isLazy {
+				s2.isLazy = true
+			}
 			s1 = concatenate(s1, s2)
 			nfa = append(nfa, s1)
 		case questionNode: // ab? is equivalent to a(b|)
@@ -1078,6 +1267,9 @@ func thompson(re []postfixNode) (Reg, error) {
 			if err != nil {
 				return Reg{}, err
 			}
+			if c.isLazy {
+				s2.isLazy = true
+			}
 			nfa = append(nfa, s2)
 		case pipeNode:
 			// A pipe operator doesn't actually need either operand to be present. If an operand isn't present,
@@ -1133,6 +1325,9 @@ func thompson(re []postfixNode) (Reg, error) {
 				if err != nil {
 					return Reg{}, err
 				}
+				if c.isLazy {
+					s2.isLazy = true
+				}
 				stateToAdd = concatenate(stateToAdd, s2)
 			} else { // Case 2
 				for i := c.startReps; i < c.endReps; i++ {
@@ -1140,6 +1335,9 @@ func thompson(re []postfixNode) (Reg, error) {
 					if err != nil {
 						return Reg{}, fmt.Errorf("error processing bounded repetition")
 					}
+					if c.isLazy {
+						tmp.isLazy = true
+					}
 					stateToAdd = concatenate(stateToAdd, tmp)
 				}
 			}
--- a/regex/doc.go
+++ b/regex/doc.go
@@ -18,7 +18,7 @@ Single characters:
 	[^abc]			Negated character class - match any character except a, b and c
 	[^a-z]			Negated character range - do not match any character from a to z
 	\[				Match a literal '['. Backslashes can escape any character with special meaning, including another backslash.
-	\452			Match the character with the octal value 452 (up to 3 digits)
+	\0452			Match the character with the octal value 452 (up to 4 digits, first digit must be 0)
 	\xFF			Match the character with the hex value FF (exactly 2 characters)
 	\x{0000FF}		Match the character with the hex value 0000FF (exactly 6 characters)
 	\n				Newline
@@ -60,14 +60,24 @@ Composition:
 	x|y				Match x or y (prefer x)
 	xy|z			Match xy or z (prefer xy)

-Repitition (always greedy, preferring more):
+Repitition:

-	x*				Match x zero or more times
-	x+				Match x one or more times
-	x?				Match x zero or one time
-	x{m,n}			Match x between m and n times (inclusive)
-	x{m,}			Match x atleast m times
-	x{,n}			Match x between 0 and n times (inclusive)
+	Greedy:
+	x*				Match x zero or more times, prefer more
+	x+				Match x one or more times, prefer more
+	x?				Match x zero or one time, prefer one
+	x{m,n}			Match x between m and n times (inclusive), prefer more
+	x{m,}			Match x atleast m times, prefer more
+	x{,n}			Match x between 0 and n times (inclusive), prefer more
+	x{m}			Match x exactly m times
+
+	Lazy:
+	x*?				Match x zero or more times, prefer fewer
+	x+?				Match x one or more times, prefer fewer
+	x??				Match x zero or one time, prefer zero
+	x{m,n}?			Match x between m and n times (inclusive), prefer fewer
+	x{m,}?			Match x atleast m times, prefer fewer
+	x{,n}?			Match x between 0 and n times (inclusive), prefer fewer
 	x{m}			Match x exactly m times

 Grouping:
@@ -93,6 +103,10 @@ Lookarounds:
 	(?<=x)y			Positive lookbehind - Match y if preceded by x
 	(?<!x)y			Negative lookbehind - Match y if NOT preceded by x

+Backreferences:
+
+	(xy)\1			Match 'xy' followed by the text most recently captured by group 1 (in this case, 'xy')
+
 Numeric ranges:

 	<x-y>			Match any number from x to y (inclusive) (x and y must be positive numbers)
@@ -103,17 +117,13 @@ Numeric ranges:
 The engine and the API differ from [regexp] in a few ways, some of them very subtle.
 The key differences are mentioned below.

-1. Greediness:
-
-This engine currently does not support non-greedy operators.
-
-2. Byte-slices and runes:
+1. Byte-slices and runes:

 My engine does not support byte-slices. When a matching function receives a string, it converts it into a
 rune-slice to iterate through it. While this has some space overhead, the convenience of built-in unicode
 support made the tradeoff worth it.

-3. Return values
+2. Return values

 Rather than using primitives for return values, my engine defines two types that are used as return
 values: a [Group] represents a capturing group, and a [Match] represents a list of groups.
@@ -148,14 +158,15 @@ returns the 0-group.

 The following features from [regexp] are (currently) NOT supported:
 1. Named capturing groups
- 2. Non-greedy operators
- 3. Unicode character classes
- 4. Embedded flags (flags are instead passed as arguments to [Compile])
- 5. Literal text with \Q ... \E
+ 2. Negated POSIX classes
+ 3. Embedded flags (flags are instead passed as arguments to [Compile])
+ 4. Literal text with \Q ... \E
+ 5. Finite repetition with no start (defaulting at 0)

 The following features are not available in [regexp], but are supported in my engine:
 1. Lookarounds
 2. Numeric ranges
+ 3. Backreferences

 I hope to shorten the first list, and expand the second.
 */
--- a/regex/matching.go
+++ b/regex/matching.go
@@ -234,16 +234,16 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 	}
 	visited = append(visited, state)

-	if state.isKleene || state.isQuestion {
+	if (state.isKleene || state.isQuestion) && (state.isLazy == false) { // Greedy quantifiers
 		copyThread(state.splitState, state)
-		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
+		list := addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		copyThread(state.next, state)
 		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		return list
 	}
-	if state.isAlternation {
+	if state.isAlternation || ((state.isKleene || state.isQuestion) && state.isLazy) { // Alternation or lazy quantifier
 		copyThread(state.next, state)
-		list = addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
+		list := addStateToList(str, idx, list, *state.next, threadGroups, visited, preferLongest)
 		copyThread(state.splitState, state)
 		list = addStateToList(str, idx, list, *state.splitState, threadGroups, visited, preferLongest)
 		return list
@@ -257,10 +257,12 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
 	}
 	if state.groupBegin {
 		state.threadGroups[state.groupNum].StartIdx = idx
+		copyThread(state.next, state)
 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	if state.groupEnd {
 		state.threadGroups[state.groupNum].EndIdx = idx
+		copyThread(state.next, state)
 		return addStateToList(str, idx, list, *state.next, state.threadGroups, visited, preferLongest)
 	}
 	return append(list, state)
@@ -313,11 +315,25 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
 				if !preferLongest {
 					break
 				}
-			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
+			} else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.isBackreference && !currentState.groupBegin && !currentState.groupEnd && currentState.assert == noneAssert { // Normal character
 				if currentState.contentContains(str, idx, preferLongest) {
 					nextStates = addStateToList(str, idx+1, nextStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
 				}
+			} else if currentState.isBackreference && currentState.threadGroups[currentState.referredGroup].IsValid() {
+				groupLength := currentState.threadGroups[currentState.referredGroup].EndIdx - currentState.threadGroups[currentState.referredGroup].StartIdx
+				if currentState.threadBackref == groupLength {
+					currentState.threadBackref = 0
+					copyThread(currentState.next, currentState)
+					currentStates = addStateToList(str, idx, currentStates, *currentState.next, currentState.threadGroups, nil, preferLongest)
+				} else {
+					idxInReferredGroup := currentState.threadGroups[currentState.referredGroup].StartIdx + currentState.threadBackref
+					if idxInReferredGroup < len(str) && idx < len(str) && str[idxInReferredGroup] == str[idx] {
+						currentState.threadBackref += 1
+						nextStates = append(nextStates, currentState)
+					}
+				}
 			}
+
 		}
 		currentStates = append([]nfaState{}, nextStates...)
 		nextStates = nil
--- a/regex/misc.go
+++ b/regex/misc.go
@@ -16,8 +16,11 @@ var rparenRune rune = 0xF0006
 var nonCapLparenRune rune = 0xF0007 // Represents a non-capturing group's LPAREN
 var escBackslashRune rune = 0xF0008 // Represents an escaped backslash
 var charRangeRune rune = 0xF0009    // Represents a character range
+var lazyKleeneRune rune = 0xF000A   // Represents a lazy kleene star
+var lazyPlusRune rune = 0xF000B     // Represents a lazy plus operator
+var lazyQuestionRune rune = 0xF000C // Represents a lazy question operator

-var specialChars = []rune{'?', '*', '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}
+var specialChars = []rune{'?', lazyQuestionRune, '*', lazyKleeneRune, '\\', '^', '$', '{', '}', '(', ')', '[', ']', '+', lazyPlusRune, '|', '.', concatRune, '<', '>', lbracketRune, rbracketRune, nonCapLparenRune}

 // An interface for int and rune, which are identical
 type character interface {
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -34,6 +34,7 @@ type nfaState struct {
 	isKleene                   bool       // Identifies whether current node is a 0-state representing Kleene star
 	isQuestion                 bool       // Identifies whether current node is a 0-state representing the question operator
 	isAlternation              bool       // Identifies whether current node is a 0-state representing an alternation
+	isLazy                     bool       // Only for split states - Identifies whether or not to flip the order of branches (try one branch before the other)
 	splitState                 *nfaState  // Only for alternation states - the 'other' branch of the alternation ('next' is the first)
 	assert                     assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
 	allChars                   bool       // Whether or not the state represents all characters (eg. a 'dot' metacharacter). A 'dot' node doesn't store any contents directly, as it would take up too much space
@@ -44,9 +45,11 @@ type nfaState struct {
 	groupBegin                 bool       // Whether or not the node starts a capturing group
 	groupEnd                   bool       // Whether or not the node ends a capturing group
 	groupNum                   int        // Which capturing group the node starts / ends
+	isBackreference            bool       // Whether or not current node is backreference
+	referredGroup              int        // If current node is a backreference, the node that it points to
 	// The following properties depend on the current match - I should think about resetting them for every match.
-	zeroMatchFound bool    // Whether or not the state has been used for a zero-length match - only relevant for zero states
-	threadGroups   []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
+	threadGroups  []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
+	threadBackref int     // If current node is a backreference, how many characters to look forward into the referred group
 }

 // Clones the NFA starting from the given state.
@@ -75,14 +78,16 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
 		isKleene:        stateToClone.isKleene,
 		isQuestion:      stateToClone.isQuestion,
 		isAlternation:   stateToClone.isAlternation,
+		isLazy:          stateToClone.isLazy,
 		assert:          stateToClone.assert,
-		zeroMatchFound:  stateToClone.zeroMatchFound,
 		allChars:        stateToClone.allChars,
 		except:          append([]rune{}, stateToClone.except...),
 		lookaroundRegex: stateToClone.lookaroundRegex,
 		groupEnd:        stateToClone.groupEnd,
 		groupBegin:      stateToClone.groupBegin,
 		groupNum:        stateToClone.groupNum,
+		isBackreference: stateToClone.isBackreference,
+		referredGroup:   stateToClone.referredGroup,
 	}
 	cloneMap[stateToClone] = clone
 	for i, s := range stateToClone.output {
@@ -122,6 +127,7 @@ func resetThreadsHelper(state *nfaState, visitedMap map[*nfaState]bool) {
 	}
 	// Assuming it hasn't been visited
 	state.threadGroups = nil
+	state.threadBackref = 0
 	visitedMap[state] = true
 	if state.isAlternation {
 		resetThreadsHelper(state.next, visitedMap)
@@ -419,6 +425,7 @@ func (s nfaState) equals(other nfaState) bool {
 		s.next == other.next &&
 		s.isKleene == other.isKleene &&
 		s.isQuestion == other.isQuestion &&
+		s.isLazy == other.isLazy &&
 		s.isAlternation == other.isAlternation &&
 		s.splitState == other.splitState &&
 		s.assert == other.assert &&
@@ -428,7 +435,8 @@ func (s nfaState) equals(other nfaState) bool {
 		s.groupBegin == other.groupBegin &&
 		s.groupEnd == other.groupEnd &&
 		s.groupNum == other.groupNum &&
-		slices.Equal(s.threadGroups, other.threadGroups)
+		slices.Equal(s.threadGroups, other.threadGroups) &&
+		s.threadBackref == other.threadBackref
 }

 func stateExists(list []nfaState, s nfaState) bool {
--- a/regex/postfixNode.go
+++ b/regex/postfixNode.go
@@ -1,6 +1,8 @@
 package regex

-import "fmt"
+import (
+	"fmt"
+)

 type nodeType int

@@ -20,6 +22,7 @@ const (
 	assertionNode
 	lparenNode
 	rparenNode
+	backreferenceNode
 )

 // Helper constants for lookarounds
@@ -31,15 +34,17 @@ const lookbehind = -1
 var infinite_reps int = -1 // Represents infinite reps eg. the end range in {5,}
 // This represents a node in the postfix representation of the expression
 type postfixNode struct {
-	nodetype       nodeType
-	contents       []rune        // Contents of the node
-	startReps      int           // Minimum number of times the node should be repeated - used with numeric specifiers
-	endReps        int           // Maximum number of times the node should be repeated - used with numeric specifiers
-	allChars       bool          // Whether or not the current node represents all characters (eg. dot metacharacter)
-	except         []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
-	lookaroundSign int           // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
-	lookaroundDir  int           // Lookbehind or lookahead
-	nodeContents   []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
+	nodetype        nodeType
+	contents        []rune        // Contents of the node
+	startReps       int           // Minimum number of times the node should be repeated - used with numeric specifiers
+	endReps         int           // Maximum number of times the node should be repeated - used with numeric specifiers
+	allChars        bool          // Whether or not the current node represents all characters (eg. dot metacharacter)
+	except          []postfixNode // For inverted character classes, we match every unicode character _except_ a few. In this case, allChars is true and the exceptions are placed here.
+	lookaroundSign  int           // ONLY USED WHEN nodetype == ASSERTION. Whether we have a positive or negative lookaround.
+	lookaroundDir   int           // Lookbehind or lookahead
+	nodeContents    []postfixNode // ONLY USED WHEN nodetype == CHARCLASS. Holds all the nodes inside the given CHARCLASS node.
+	referencedGroup int           // ONLY USED WHEN nodetype == backreferenceNode. Holds the group which this one refers to. After parsing is done, the expression will be rewritten eg. (a)\1 will become (a)(a). So the return value of ShuntingYard() shouldn't contain a backreferenceNode.
+	isLazy          bool          // ONLY USED WHEN nodetype == kleene or question
 }

 // Converts the given list of postfixNodes to one node of type CHARCLASS.
@@ -158,10 +163,19 @@ func newPostfixNode(contents ...rune) postfixNode {
 		switch contents[0] {
 		case '+':
 			to_return.nodetype = plusNode
+		case lazyPlusRune:
+			to_return.nodetype = plusNode
+			to_return.isLazy = true
 		case '?':
 			to_return.nodetype = questionNode
+		case lazyQuestionRune:
+			to_return.nodetype = questionNode
+			to_return.isLazy = true
 		case '*':
 			to_return.nodetype = kleeneNode
+		case lazyKleeneRune:
+			to_return.nodetype = kleeneNode
+			to_return.isLazy = true
 		case '|':
 			to_return.nodetype = pipeNode
 		case concatRune:
@@ -208,3 +222,44 @@ func newPostfixCharNode(contents ...rune) postfixNode {
 	toReturn.contents = append(toReturn.contents, contents...)
 	return toReturn
 }
+
+// newPostfixBackreferenceNode creates and returns a backreference node, referring to the given group
+func newPostfixBackreferenceNode(referred int) postfixNode {
+	toReturn := postfixNode{}
+	toReturn.startReps = 1
+	toReturn.endReps = 1
+	toReturn.nodetype = backreferenceNode
+	toReturn.referencedGroup = referred
+	return toReturn
+}
+
+// rewriteBackreferences rewrites any backreferences in the given postfixNode slice, into their respective groups.
+// It stores the relation in a map, and returns it as the second return value.
+// It uses parenIndices to determine where a group starts and ends in nodes.
+// For example, \1(a) will be rewritten into (a)(a), and 1 -> 2 will be the hashmap value.
+// It returns an error if a backreference points to an invalid group.
+// func rewriteBackreferences(nodes []postfixNode, parenIndices []Group) ([]postfixNode, map[int]int, error) {
+// 	rtv := make([]postfixNode, 0)
+// 	referMap := make(map[int]int)
+// 	numGroups := 0
+// 	groupIncrement := 0 // If we have a backreference before the group its referring to, then the group its referring to will have its group number incremented.
+// 	for i, node := range nodes {
+// 		if node.nodetype == backreferenceNode {
+// 			if node.referencedGroup >= len(parenIndices) {
+// 				return nil, nil, fmt.Errorf("invalid backreference")
+// 			}
+// 			rtv = slices.Concat(rtv, nodes[parenIndices[node.referencedGroup].StartIdx:parenIndices[node.referencedGroup].EndIdx+1]) // Add all the nodes in the group to rtv
+// 			numGroups += 1
+// 			if i < parenIndices[node.referencedGroup].StartIdx {
+// 				groupIncrement += 1
+// 			}
+// 			referMap[numGroups] = node.referencedGroup + groupIncrement
+// 		} else {
+// 			rtv = append(rtv, node)
+// 			if node.nodetype == lparenNode {
+// 				numGroups += 1
+// 			}
+// 		}
+// 	}
+// 	return rtv, referMap, nil
+// }
--- a/regex/re_test.go
+++ b/regex/re_test.go
@@ -117,6 +117,7 @@ var reTests = []struct {
 	{`\d{3,4}`, nil, "ababab555", []Group{{6, 9}}},
 	{`\bpaint\b`, nil, "paints", []Group{}},
 	{`\b\w{5}\b`, nil, "paint", []Group{{0, 5}}},
+	{`\w{}`, nil, "test", nil},
 	{`[^\w]`, nil, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
 	{`[^\W]`, nil, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
 	{`[\[\]]`, nil, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
@@ -179,7 +180,7 @@ var reTests = []struct {
 	{"[[:graph:]]+", nil, "abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPRQSTUVWXYZ0123456789!@#$%^&*", []Group{{0, 70}}},

 	// Test cases from Python's RE test suite
-	{`[\1]`, nil, "\x01", []Group{{0, 1}}},
+	{`[\01]`, nil, "\x01", []Group{{0, 1}}},

 	{`\0`, nil, "\x00", []Group{{0, 1}}},
 	{`[\0a]`, nil, "\x00", []Group{{0, 1}}},
@@ -194,7 +195,7 @@ var reTests = []struct {
 	{`\x00ffffffffffffff`, nil, "\xff", []Group{}},
 	{`\x00f`, nil, "\x0f", []Group{}},
 	{`\x00fe`, nil, "\xfe", []Group{}},
-	{`^\w+=(\\[\000-\277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},
+	{`^\w+=(\\[\000-\0277]|[^\n\\])*`, nil, "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", []Group{{0, 32}}},

 	{`a.b`, nil, `acb`, []Group{{0, 3}}},
 	{`a.b`, nil, "a\nb", []Group{}},
@@ -312,11 +313,7 @@ var reTests = []struct {
 	{`a[-]?c`, nil, `ac`, []Group{{0, 2}}},
 	{`^(.+)?B`, nil, `AB`, []Group{{0, 2}}},
 	{`\0009`, nil, "\x009", []Group{{0, 2}}},
-	{`\141`, nil, "a", []Group{{0, 1}}},
-
-	// At this point, the python test suite has a bunch
-	// of backreference tests. Since my engine doesn't
-	// implement backreferences, I've skipped those tests.
+	{`\0141`, nil, "a", []Group{{0, 1}}},

 	{`*a`, nil, ``, nil},
 	{`(*)b`, nil, ``, nil},
@@ -433,7 +430,8 @@ var reTests = []struct {
 	{`a[-]?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AC`, []Group{{0, 2}}},
 	{`^(.+)?B`, []ReFlag{RE_CASE_INSENSITIVE}, `ab`, []Group{{0, 2}}},
 	{`\0009`, []ReFlag{RE_CASE_INSENSITIVE}, "\x009", []Group{{0, 2}}},
-	{`\141`, []ReFlag{RE_CASE_INSENSITIVE}, "A", []Group{{0, 1}}},
+	{`\0141`, []ReFlag{RE_CASE_INSENSITIVE}, "A", []Group{{0, 1}}},
+	{`\0141\0141`, []ReFlag{RE_CASE_INSENSITIVE}, "AA", []Group{{0, 2}}},

 	{`a[-]?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AC`, []Group{{0, 2}}},

@@ -464,8 +462,10 @@ var reTests = []struct {
 	{`[\D5]+`, nil, `1234abc5678`, []Group{{4, 8}}},
 	{`[\da-fA-F]+`, nil, `123abc`, []Group{{0, 6}}},
 	{`\xff`, nil, "\u00ff", []Group{{0, 1}}},
+	{`\xff+`, nil, "\u00ff\u00ff", []Group{{0, 2}}},
 	{`\xFF`, nil, "\u00ff", []Group{{0, 1}}},
 	{`\x00ff`, nil, "\u00ff", []Group{}},
+	{`\x{0000ff}+`, nil, "\u00ff\u00ff", []Group{{0, 2}}},
 	{`\x{0000ff}`, nil, "\u00ff", []Group{{0, 1}}},
 	{`\x{0000FF}`, nil, "\u00ff", []Group{{0, 1}}},
 	{"\t\n\v\r\f\a", nil, "\t\n\v\r\f\a", []Group{{0, 6}}},
@@ -473,7 +473,7 @@ var reTests = []struct {
 	{`[\t][\n][\v][\r][\f][\b]`, nil, "\t\n\v\r\f\b", []Group{{0, 6}}},
 	{`.*d`, nil, "abc\nabd", []Group{{4, 7}}},
 	{`(`, nil, "-", nil},
-	{`[\41]`, nil, `!`, []Group{{0, 1}}},
+	{`[\041]`, nil, `!`, []Group{{0, 1}}},
 	{`(?<!abc)(d.f)`, nil, `abcdefdof`, []Group{{6, 9}}},
 	{`[\w-]+`, nil, `laser_beam`, []Group{{0, 10}}},
 	{`M+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
@@ -489,7 +489,25 @@ var reTests = []struct {
 	{`[b-e]`, nil, `f`, []Group{}},

 	{`*?`, nil, `-`, nil},
-	{`a*?`, nil, `-`, nil}, // non-greedy operators are not supported
+	{`a.+c`, nil, `abcabc`, []Group{{0, 6}}},
+	// Lazy quantifier tests
+	{`a.+?c`, nil, `abcabc`, []Group{{0, 3}, {3, 6}}},
+	{`ab*?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{{0, 6}}},
+	{`ab+?bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}},
+	{`ab??bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBC`, []Group{{0, 4}}},
+	{`ab??bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
+	{`ab??bc`, []ReFlag{RE_CASE_INSENSITIVE}, `ABBBBC`, []Group{}},
+	{`ab??c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABC`, []Group{{0, 3}}},
+	{`a.*?c`, []ReFlag{RE_CASE_INSENSITIVE}, `AXYZC`, []Group{{0, 5}}},
+	{`a.+?c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCABC`, []Group{{0, 3}, {3, 6}}},
+	{`a.*?c`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCABC`, []Group{{0, 3}, {3, 6}}},
+	{`.*?\S *:`, nil, `xx:`, []Group{{0, 3}}},
+	{`a[ ]*? (\d+).*`, nil, `a   10`, []Group{{0, 6}}},
+	{`a[ ]*? (\d+).*`, nil, `a    10`, []Group{{0, 7}}},
+	{`"(?:\\"|[^"])*?"`, nil, `"\""`, []Group{{0, 4}}},
+	{`^.*?$`, nil, "one\ntwo\nthree", []Group{}},
+	{`a[^>]*?b`, nil, `a>b`, []Group{}},
+	{`^a*?$`, nil, `foo`, []Group{}},

 	// Numeric range tests - this is a feature that I added, and doesn't exist
 	// in any other mainstream regex engine
@@ -520,6 +538,30 @@ var reTests = []struct {
 	{`<389-400`, nil, `-`, nil},
 	{`<389-400>`, nil, `391`, []Group{{0, 3}}},
 	{`\b<1-10000>\b`, nil, `America declared independence in 1776.`, []Group{{33, 37}}},
+
+	{`\p{Tamil}+`, nil, `உயிரெழுத்து`, []Group{{0, 11}}}, // Each letter and matra is counted as a separate rune, so 'u', 'ya', 'e (matra), 'ra', 'e (matra)', 'zha', (oo (matra), 'tha', 'ith', 'tha', 'oo (matra)'.
+	{`\P{Tamil}+`, nil, `vowel=உயிரெழுத்து`, []Group{{0, 6}}},
+	{`\P`, nil, `உயிரெழுத்து`, nil},
+	{`\PM\pM*`, nil, `உயிரெழுத்து`, []Group{{0, 1}, {1, 3}, {3, 5}, {5, 7}, {7, 9}, {9, 11}}},
+	{`\pN+`, nil, `123abc456def`, []Group{{0, 3}, {6, 9}}},
+	{`\PN+`, nil, `123abc456def`, []Group{{3, 6}, {9, 12}}},
+	{`[\p{Greek}\p{Cyrillic}]`, nil, `ΣωШД`, []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}}},
+
+	{`(?<=\().*?(?=\))`, nil, `(abc)`, []Group{{1, 4}}},
+
+	{`((a|b)\2)`, nil, `aa`, []Group{{0, 2}}},
+	{`((a|b)\2)`, nil, `bb`, []Group{{0, 2}}},
+	{`((a|b)\2)`, nil, `ab`, []Group{}},
+	{`((a|b)\2)`, nil, `ba`, []Group{}},
+
+	{`((a|b)\2){3}`, nil, `aaaaaa`, []Group{{0, 6}}},
+	{`((a|b)\2){3}`, nil, `bbbbbb`, []Group{{0, 6}}},
+	{`((a|b)\2){3}`, nil, `bbaaaa`, []Group{{0, 6}}},
+	{`((a|b)\2){3}`, nil, `aabbaa`, []Group{{0, 6}}},
+	{`((a|b)\2){3}`, nil, `aaaabb`, []Group{{0, 6}}},
+	{`((a|b)\2){3}`, nil, `bbaabb`, []Group{{0, 6}}},
+	{`((a|b)\2){3}`, nil, `baabab`, []Group{}},
+	{`((a|b)\2){3}`, nil, `bbabab`, []Group{}},
 }

 var groupTests = []struct {
@@ -581,13 +623,37 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `bcdd`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, nil, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, nil, `a`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, nil, `a!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\041`, nil, `a!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, nil, `abcde`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, nil, `(a, b)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},

-	// At this point, the python test suite has a bunch
-	// of backreference tests. Since my engine doesn't
-	// implement backreferences, I've skipped those tests.
+	// Backreference tests
+	{`(abc)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
+	{`([a-c]+)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}}},
+	{`([a-c]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
+	{`^(.+)?B`, nil, `AB`, []Match{[]Group{{0, 2}, {0, 1}}}},
+	{`(a+).\1$`, nil, `aaaaa`, []Match{[]Group{{0, 5}, {0, 2}}}},
+	{`^(a+).\1$`, nil, `aaaa`, []Match{}},
+	{`(a)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
+	{`(a+)\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
+	{`(a+)+\1`, nil, `aa`, []Match{[]Group{{0, 2}, {0, 1}}}},
+	{`(a).+\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
+	{`(a)ba*\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
+	{`(aa|a)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
+	{`(a|aa)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
+	{`(a+)a\1$`, nil, `aaa`, []Match{[]Group{{0, 3}, {0, 1}}}},
+	{`([abc]*)\1`, nil, `abcabc`, []Match{[]Group{{0, 6}, {0, 3}}, []Group{{6, 6}, {6, 6}}}},
+	{`(a)(?:b)\1`, nil, `aba`, []Match{[]Group{{0, 3}, {0, 1}}}},
+	{`(a)(?:b)\1`, nil, `abb`, []Match{}},
+	{`(?:a)(b)\1`, nil, `aba`, []Match{}},
+	{`(?:a)(b)\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
+	{`(?:(cat)|(dog))\2`, nil, `catdog`, []Match{}},
+	{`(?:a)\1`, nil, `aa`, nil},
+	{`((cat)|(dog)|(cow)|(bat))\4`, nil, `cowcow`, []Match{[]Group{{0, 6}, {0, 3}, {-1, -1}, {-1, -1}, {0, 3}, {-1, -1}}}},
+	{`(a|b)*\1`, nil, `abb`, []Match{[]Group{{0, 3}, {1, 2}}}},
+	{`(a|b)*\1`, nil, `aba`, []Match{}},
+	{`(a|b)*\1`, nil, `bab`, []Match{}},
+	{`(a|b)*\1`, nil, `baa`, []Match{[]Group{{0, 3}, {1, 2}}}},

 	{`(a)(b)c|ab`, nil, `ab`, []Match{[]Group{{0, 2}}}},
 	{`(a)+x`, nil, `aaax`, []Match{[]Group{{0, 4}, {2, 3}}}},
@@ -636,7 +702,7 @@ var groupTests = []struct {
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `BCDD`, []Match{}},
 	{`(bc+d$|ef*g.|h?i(j|k))`, []ReFlag{RE_CASE_INSENSITIVE}, `reffgz`, []Match{[]Group{{1, 6}, {1, 6}}}},
 	{`(((((((((a)))))))))`, []ReFlag{RE_CASE_INSENSITIVE}, `A`, []Match{[]Group{{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
-	{`(((((((((a)))))))))\41`, []ReFlag{RE_CASE_INSENSITIVE}, `A!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
+	{`(((((((((a)))))))))\041`, []ReFlag{RE_CASE_INSENSITIVE}, `A!`, []Match{[]Group{{0, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}}},
 	{`(.*)c(.*)`, []ReFlag{RE_CASE_INSENSITIVE}, `ABCDE`, []Match{[]Group{{0, 5}, {0, 2}, {3, 5}}}},
 	{`\((.*), (.*)\)`, []ReFlag{RE_CASE_INSENSITIVE}, `(A, B)`, []Match{[]Group{{0, 6}, {1, 2}, {4, 5}}}},
 	{`(a)(b)c|ab`, []ReFlag{RE_CASE_INSENSITIVE}, `AB`, []Match{[]Group{{0, 2}}}},
@@ -688,6 +754,18 @@ var groupTests = []struct {
 	// {`(a|ab|c|bcd)*(d*)`, nil, `ababcd`, []Match{[]Group{{0, 6}, {3, 6}, {6, 6}}, []Group{{6, 6}, {6, 6}, {6, 6}}}},
 	// // Bug - this should give {0,3},{0,3},{0,0},{0,3},{3,3} but it gives {0,3},{0,2},{0,1},{1,2},{2,3}
 	// //	{`((a*)(b|abc))(c*)`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 3}, {0, 0}, {0, 3}, {3, 3}}}},
+
+	// Lazy quantifier tests
+	{`a(?:b|c|d)+?(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
+	{`a(?:b|(c|e){1,2}?|d)+?(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {1, 2}, {2, 3}}}},
+	{`(?<!-):(.*?)(?<!-):`, nil, `a:bc-:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
+	{`(?<!\\):(.*?)(?<!\\):`, nil, `a:bc\:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
+	{`(?<!\?)'(.*?)(?<!\?)'`, nil, `a'bc?'de'f`, []Match{[]Group{{1, 9}, {2, 8}}}},
+	{`.*?x\s*\z(.*)`, []ReFlag{RE_MULTILINE, RE_SINGLE_LINE}, "xx\nx\n", []Match{[]Group{{0, 5}, {5, 5}}}},
+	{`.*?x\s*\z(.*)`, []ReFlag{RE_MULTILINE}, "xx\nx\n", []Match{[]Group{{3, 5}, {5, 5}}}},
+	{`^([ab]*?)(?=(b)?)c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}, {-1, -1}}}},
+	{`^([ab]*?)(?!(b))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}, {-1, -1}}}},
+	{`^([ab]*?)(?<!(a))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}, {-1, -1}}}},
 }

 func TestFind(t *testing.T) {
@@ -792,23 +870,24 @@ func TestFindSubmatch(t *testing.T) {
 				if test.result != nil {
 					panic(err)
 				}
-			}
-			match, err := regComp.FindSubmatch(test.str)
-			if err != nil {
-				if len(test.result) != 0 {
-					t.Errorf("Wanted %v got no match\n", test.result[0])
-				}
-			} else if len(test.result) == 0 {
-				t.Errorf("Wanted no match got %v\n", match)
-			}
-			for i := range match {
-				if match[i].IsValid() {
-					if test.result[0][i] != match[i] {
-						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
+			} else {
+				match, err := regComp.FindSubmatch(test.str)
+				if err != nil {
+					if len(test.result) != 0 {
+						t.Errorf("Wanted %v got no match\n", test.result[0])
 					}
-				} else {
-					if i < len(test.result) && test.result[0][i].IsValid() {
-						t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
+				} else if len(test.result) == 0 {
+					t.Errorf("Wanted no match got %v\n", match)
+				}
+				for i := range match {
+					if match[i].IsValid() {
+						if test.result[0][i] != match[i] {
+							t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
+						}
+					} else {
+						if i < len(test.result) && test.result[0][i].IsValid() {
+							t.Errorf("Wanted %v	Got %v\n", test.result[0], match)
+						}
 					}
 				}
 			}
@@ -823,10 +902,22 @@ func TestFindStringSubmatch(t *testing.T) {
 				if test.result != nil {
 					panic(err)
 				}
-			}
-			matchStr := regComp.FindStringSubmatch(test.str)
-			if matchStr == nil {
-				if len(test.result) != 0 {
+			} else {
+				matchStr := regComp.FindStringSubmatch(test.str)
+				if matchStr == nil {
+					if len(test.result) != 0 {
+						expectedStr := funcMap(test.result[0], func(g Group) string {
+							if g.IsValid() {
+								return test.str[g.StartIdx:g.EndIdx]
+							} else {
+								return ""
+							}
+						})
+						t.Errorf("Wanted %v got no match\n", expectedStr)
+					}
+				} else if len(test.result) == 0 {
+					t.Errorf("Wanted no match got %v\n", matchStr)
+				} else {
 					expectedStr := funcMap(test.result[0], func(g Group) string {
 						if g.IsValid() {
 							return test.str[g.StartIdx:g.EndIdx]
@@ -834,26 +925,15 @@ func TestFindStringSubmatch(t *testing.T) {
 							return ""
 						}
 					})
-					t.Errorf("Wanted %v got no match\n", expectedStr)
-				}
-			} else if len(test.result) == 0 {
-				t.Errorf("Wanted no match got %v\n", matchStr)
-			} else {
-				expectedStr := funcMap(test.result[0], func(g Group) string {
-					if g.IsValid() {
-						return test.str[g.StartIdx:g.EndIdx]
-					} else {
-						return ""
-					}
-				})
-				for i, groupStr := range matchStr {
-					if groupStr == "" {
-						if i < len(expectedStr) && expectedStr[i] != "" {
-							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
-						}
-					} else {
-						if expectedStr[i] != groupStr {
-							t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
+					for i, groupStr := range matchStr {
+						if groupStr == "" {
+							if i < len(expectedStr) && expectedStr[i] != "" {
+								t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
+							}
+						} else {
+							if expectedStr[i] != groupStr {
+								t.Errorf("Wanted %v	Got %v\n", expectedStr, matchStr)
+							}
 						}
 					}
 				}
@@ -870,10 +950,24 @@ func TestFindAllStringSubmatch(t *testing.T) {
 				if test.result != nil {
 					panic(err)
 				}
-			}
-			matchStrs := regComp.FindAllStringSubmatch(test.str)
-			if matchStrs == nil {
-				if len(test.result) != 0 {
+			} else {
+				matchStrs := regComp.FindAllStringSubmatch(test.str)
+				if matchStrs == nil {
+					if len(test.result) != 0 {
+						expectedStrs := funcMap(test.result, func(m Match) []string {
+							return funcMap(m, func(g Group) string {
+								if g.IsValid() {
+									return test.str[g.StartIdx:g.EndIdx]
+								} else {
+									return ""
+								}
+							})
+						})
+						t.Errorf("Wanted %v got no match\n", expectedStrs)
+					}
+				} else if len(test.result) == 0 {
+					t.Errorf("Wanted no match got %v\n", matchStrs)
+				} else {
 					expectedStrs := funcMap(test.result, func(m Match) []string {
 						return funcMap(m, func(g Group) string {
 							if g.IsValid() {
@@ -883,29 +977,16 @@ func TestFindAllStringSubmatch(t *testing.T) {
 							}
 						})
 					})
-					t.Errorf("Wanted %v got no match\n", expectedStrs)
-				}
-			} else if len(test.result) == 0 {
-				t.Errorf("Wanted no match got %v\n", matchStrs)
-			} else {
-				expectedStrs := funcMap(test.result, func(m Match) []string {
-					return funcMap(m, func(g Group) string {
-						if g.IsValid() {
-							return test.str[g.StartIdx:g.EndIdx]
-						} else {
-							return ""
-						}
-					})
-				})
-				for i, matchStr := range matchStrs {
-					for j, groupStr := range matchStr {
-						if groupStr == "" {
-							if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
-								t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
-							}
-						} else {
-							if expectedStrs[i][j] != groupStr {
-								t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
+					for i, matchStr := range matchStrs {
+						for j, groupStr := range matchStr {
+							if groupStr == "" {
+								if j < len(expectedStrs[i]) && expectedStrs[i][j] != "" {
+									t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
+								}
+							} else {
+								if expectedStrs[i][j] != groupStr {
+									t.Errorf("Wanted %v	Got %v\n", expectedStrs, matchStrs)
+								}
 							}
 						}
 					}
@@ -923,17 +1004,18 @@ func TestFindAllSubmatch(t *testing.T) {
 				if test.result != nil {
 					panic(err)
 				}
-			}
-			matchIndices := regComp.FindAllSubmatch(test.str)
-			for i := range matchIndices {
-				for j := range matchIndices[i] {
-					if matchIndices[i][j].IsValid() {
-						if test.result[i][j] != matchIndices[i][j] {
-							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
-						}
-					} else {
-						if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
-							t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
+			} else {
+				matchIndices := regComp.FindAllSubmatch(test.str)
+				for i := range matchIndices {
+					for j := range matchIndices[i] {
+						if matchIndices[i][j].IsValid() {
+							if test.result[i][j] != matchIndices[i][j] {
+								t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
+							}
+						} else {
+							if i < len(test.result) && j < len(test.result[i]) && test.result[i][j].IsValid() {
+								t.Errorf("Wanted %v	Got %v\n", test.result, matchIndices)
+							}
 						}
 					}
 				}
--- a/regex/todo.txt
+++ b/regex/todo.txt
@@ -4,4 +4,5 @@
 Ideas for flags:
    -m <num> : Print <num>th match (-m 1 = first match, -m 2 = second match)
    -g <num> : Print the <num>th group
+    -r : Specify a directory instead of a file, reads recursively
 4. Refactor code for flags - make each flag's code a function, which modifies the result of findAllMatches
Author	SHA1	Message	Date
Aadhavan Srinivasan	fc0af1ccc5	Updated README	2025-03-28 09:09:50 -04:00
Aadhavan Srinivasan	980fb77114	Makefile changes	2025-03-28 09:07:29 -04:00
Aadhavan Srinivasan	4c4d747a9c	Renamed 'cmd' to 'kg' so that go install works correctly	2025-03-28 09:06:12 -04:00
Aadhavan Srinivasan	595b86df60	Added comment to function	2025-03-25 10:28:29 -04:00
Aadhavan Srinivasan	5f9bab528a	Don't quit if a file is not found, continue with any other files that are found	2025-03-18 11:45:50 -04:00
Aadhavan Srinivasan	530564b920	Print error if input file is a directory; print program name before error string	2025-03-16 19:48:49 -04:00
Aadhavan Srinivasan	02b3b469c4	Added support for line num flag	2025-03-16 19:23:23 -04:00
Aadhavan Srinivasan	e489dc4c27	Started working on line number flag	2025-03-15 16:24:50 -04:00
Aadhavan Srinivasan	34149980a4	Started working on multiple filename arguments; prefix each line with filename containing the line; mostly indentation changes	2025-03-13 12:11:54 -04:00
Aadhavan Srinivasan	e79c19a929	Updated TODO	2025-03-12 16:46:57 -04:00
Aadhavan Srinivasan	d2bce37935	Updated argument count validation	2025-03-12 16:46:05 -04:00
Aadhavan Srinivasan	bb3b866b77	Started working on file arguments - stdin is used if arg is "-"	2025-03-12 16:44:40 -04:00
Aadhavan Srinivasan	e07f27dc78	Merge branch 'master' of https://gitea.twomorecents.org/Rockingcool/kleingrep	2025-02-24 07:46:54 -05:00
Aadhavan Srinivasan	65d2317f79	Added more backreference tests	2025-02-21 08:44:33 -05:00
Aadhavan Srinivasan	a631fc289c	Clone 'isBackreference' and 'referredGroup' NFA fields, because they aren't thread variables	2025-02-21 08:44:24 -05:00
Aadhavan Srinivasan	d62a429cce	Updated documentation	2025-02-20 19:58:07 -05:00
Aadhavan Srinivasan	7b31031553	Change when a newline is printed; so that we don't print extraneous newlinesraneous newlines	2025-02-17 09:37:31 -05:00
Aadhavan Srinivasan	38c842cb07	Added method to get length of unique array	2025-02-17 09:36:38 -05:00
Aadhavan Srinivasan	9f9af36be8	Fixed bug where escaped parentheses in lookarounds were counted as regular parentheses instead of literals	2025-02-17 09:36:17 -05:00
Aadhavan Srinivasan	8217b67122	Added test for escaped parentheses in lookarounds	2025-02-17 09:35:06 -05:00
Aadhavan Srinivasan	1f06dcef64	Just declare the variable instead of initializing it as well	2025-02-16 15:51:53 -05:00
Aadhavan Srinivasan	119475b41b	Updated README	2025-02-14 12:13:01 -05:00
Aadhavan Srinivasan	6151cc8cf6	Updated documentation	2025-02-14 12:07:43 -05:00
Aadhavan Srinivasan	3eaf4eb19c	Updated README	2025-02-14 12:00:33 -05:00
Aadhavan Srinivasan	d453815831	Added README	2025-02-14 11:59:43 -05:00
Aadhavan Srinivasan	3a2916baae	Set 'isLazy' to true in the NFA, if the postfixNode has the flag set	2025-02-14 11:37:48 -05:00
Aadhavan Srinivasan	9d6344719f	Reverse order of trying branches if the quantifier is lazy	2025-02-14 11:37:28 -05:00
Aadhavan Srinivasan	f5c868566b	Added field to NFA, denoting if a node is lazy or not	2025-02-14 11:37:14 -05:00
Aadhavan Srinivasan	1cd6da218f	Added lazy quantifier tests	2025-02-14 11:36:56 -05:00
Aadhavan Srinivasan	277cbc0fc5	Started working on lazy quantifier support	2025-02-13 20:50:30 -05:00
Aadhavan Srinivasan	3924502b72	Added code to return lazy quantifier postfixNodes	2025-02-13 20:50:11 -05:00
Aadhavan Srinivasan	36b009747b	Added metacharacters for lazy quantifiers	2025-02-13 20:49:54 -05:00
Aadhavan Srinivasan	6cd0a10a8f	Added more documentation	2025-02-13 14:14:00 -05:00
Aadhavan Srinivasan	69fb96c43d	Merge pull request 'Implement Unicode character classes' (#4 ) from implementUnicodeCharClass into master Reviewed-on: #4	2025-02-13 09:51:44 -06:00
Aadhavan Srinivasan	46bc0c8529	Removed unicode character classes from 'features not supported' list	2025-02-13 10:48:23 -05:00
Aadhavan Srinivasan	1a890a1e75	Refactoring - remove duplicate code	2025-02-13 09:10:40 -05:00
Aadhavan Srinivasan	fde3784e5a	Added unicode charclass support within character classes; Fixed bugs with hex classes and unicode classes	2025-02-13 08:58:02 -05:00
Aadhavan Srinivasan	7045711860	Convert test_str into a rune slice for better unicode compatibility, it also fixed the bug where all unicode characters wouldn't be colored	2025-02-13 08:57:06 -05:00
Aadhavan Srinivasan	d4d606d95b	Added tests for unicode character classes; more tests for hex characters	2025-02-13 08:55:12 -05:00
Aadhavan Srinivasan	9cd330e521	More work on unicode character class support - fix bug where all characters aren't being matched	2025-02-12 23:04:10 -05:00
Aadhavan Srinivasan	44d6a2005c	Started working on unicode character classes	2025-02-12 22:19:30 -05:00
Aadhavan Srinivasan	f76cd6c3d9	Merge pull request 'Implement Backreferences' (#3 ) from implementBackreferences into master Reviewed-on: #3	2025-02-12 21:17:32 -06:00
Aadhavan Srinivasan	375baa1722	Wrote more backreference tests	2025-02-12 07:51:20 -05:00
Aadhavan Srinivasan	2e47c631bb	Updated documentation to include backreferences	2025-02-12 07:50:59 -05:00
Aadhavan Srinivasan	81b8b1b11c	Do not validate a backreference if the group that it refers to is not valid	2025-02-11 19:12:58 -05:00
Aadhavan Srinivasan	2934e7a20f	Wrote tests for backreferences	2025-02-11 19:12:40 -05:00
Aadhavan Srinivasan	f466d4a8d5	More progress on backreference implementation	2025-02-11 17:06:39 -05:00
Aadhavan Srinivasan	8327450dd2	Started implementing backreferences (octal values should now be prefaced with \0)	2025-02-11 16:14:54 -05:00