Added more tests

Fix out-of-bounds access in genRangeInclusive
Replace use of 'genRange' with 'genRangeInclusive'
2025-01-30 09:09:34 -05:00 · 2025-01-30 09:09:20 -05:00 · 2025-01-30 09:09:03 -05:00 · 2025-01-30 09:03:37 -05:00 · 2025-01-30 09:03:07 -05:00 · 2025-01-30 08:58:43 -05:00
17 changed files with 84 additions and 69 deletions
--- a/cmd/helpers.go
+++ b/cmd/helpers.go
@@ -0,0 +1,27 @@
 package main
 import "slices"
 type character interface {
 	int | rune
 }
 // Returns all elements in slice A that are NOT in slice B
 func setDifference[T comparable](s1 []T, s2 []T) []T {
 	toReturn := make([]T, 0, len(s1))
 	for _, val := range s1 {
 		if !slices.Contains(s2, val) {
 			toReturn = append(toReturn, val)
 		}
 	}
 	return toReturn
 }
 // Generate numbers in a range - start (inclusive) to end (exclusive)
 func genRange[T character](start, end T) []T {
 	toRet := make([]T, end-start)
 	for i := start; i < end; i++ {
 		toRet[i-start] = i
 	}
 	return toRet
 }
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -8,11 +8,13 @@ import (
 	"os"
 	"github.com/fatih/color"
 	"gitea.twomorecents.org/Rockingcool/kg/greg"
 )
 func main() {
 	// Flags for the regex Compile function
-	flagsToCompile := make([]ReFlag, 0)
+	flagsToCompile := make([]greg.ReFlag, 0)
 	invertFlag := flag.Bool("v", false, "Invert match.")
 	// This flag has two 'modes':
@@ -29,10 +31,10 @@ func main() {
 	// These flags have to be passed to the Compile function
 	if *multiLineFlag {
-		flagsToCompile = append(flagsToCompile, RE_MULTILINE, RE_SINGLE_LINE)
+		flagsToCompile = append(flagsToCompile, greg.RE_MULTILINE, greg.RE_SINGLE_LINE)
 	}
 	if *caseInsensitiveFlag {
-		flagsToCompile = append(flagsToCompile, RE_CASE_INSENSITIVE)
+		flagsToCompile = append(flagsToCompile, greg.RE_CASE_INSENSITIVE)
 	}
 	// -l and -o are mutually exclusive: -o overrides -l
@@ -76,7 +78,7 @@ func main() {
 	reader := bufio.NewReader(os.Stdin)
 	out := bufio.NewWriter(os.Stdout)
-	regComp, err := Compile(re, flagsToCompile...)
+	regComp, err := greg.Compile(re, flagsToCompile...)
 	if err != nil {
 		fmt.Println(err)
 		return
@@ -117,14 +119,14 @@ func main() {
 				panic(err)
 			}
 		}
-		matchIndices := make([]Match, 0)
+		matchIndices := make([]greg.Match, 0)
 		if matchNumFlagEnabled {
-			tmp, err := FindNthMatch(regComp, test_str, *matchNum)
+			tmp, err := greg.FindNthMatch(regComp, test_str, *matchNum)
 			if err == nil {
 				matchIndices = append(matchIndices, tmp)
 			}
 		} else {
-			matchIndices = FindAllMatches(regComp, test_str)
+			matchIndices = greg.FindAllMatches(regComp, test_str)
 		}
 		if *printMatchesFlag {
@@ -135,7 +137,7 @@ func main() {
 					fmt.Fprintf(out, "Line %d:\n", lineNum)
 				}
 				for _, m := range matchIndices {
-					fmt.Fprintf(out, "%s\n", m.toString())
+					fmt.Fprintf(out, "%s\n", m.ToString())
 				}
 				err := out.Flush()
 				if err != nil {
@@ -148,7 +150,7 @@ func main() {
 		// This should make checking O(1) instead of O(n)
 		indicesToPrint := new_uniq_arr[int]()
 		for _, idx := range matchIndices {
-			indicesToPrint.add(genRange(idx[0].startIdx, idx[0].endIdx)...)
+			indicesToPrint.add(genRange(idx[0].StartIdx, idx[0].EndIdx)...)
 		}
 		// If we are inverting, then we should print the indices which _didn't_ match
 		// in color.
@@ -183,9 +185,9 @@ func main() {
 			for i := range test_str {
 				inMatchIndex := false
 				for _, m := range matchIndices {
-					if i == m[0].startIdx {
+					if i == m[0].StartIdx {
 						fmt.Fprintf(out, "%s", *substituteText)
-						i = m[0].endIdx
+						i = m[0].EndIdx
 						inMatchIndex = true
 						break
 					}
@@ -201,7 +203,7 @@ func main() {
 					// Newline after every match - only if -o is enabled and -v is disabled.
 					if *onlyFlag && !(*invertFlag) {
 						for _, idx := range matchIndices {
-							if i+1 == idx[0].endIdx { // End index is one more than last index of match
+							if i+1 == idx[0].EndIdx { // End index is one more than last index of match
 								fmt.Fprintf(out, "\n")
 								break
 							}
--- a/cmd/unique_array.go
+++ b/cmd/unique_array.go
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module re
+module gitea.twomorecents.org/Rockingcool/kg
 go 1.23.1
--- a/greg/compile.go
+++ b/greg/compile.go
@@ -1,4 +1,4 @@
-package main
+package greg
 import (
 	"fmt"
@@ -682,7 +682,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 						if startRangeRune > endRangeRune {
 							return nil, fmt.Errorf("character range syntax is [a-b], not [b-a]")
 						}
-						chars = append(chars, newPostfixCharNode(genRange(startRangeRune, endRangeRune+1)...))
+						chars = append(chars, newPostfixCharNode(genRangeInclusive(startRangeRune, endRangeRune)...))
 					}
 					endOfRange = false // Reset the flag
--- a/greg/matching.go
+++ b/greg/matching.go
@@ -1,4 +1,4 @@
-package main
+package greg
 import (
 	"fmt"
@@ -10,15 +10,15 @@ type Match []Group
 // a Group represents a group. It contains the start index and end index of the match
 type Group struct {
-	startIdx int
+	StartIdx int
-	endIdx   int
+	EndIdx   int
 }
 func newMatch(size int) Match {
 	toRet := make([]Group, size)
 	for i := range toRet {
-		toRet[i].startIdx = -1
+		toRet[i].StartIdx = -1
-		toRet[i].endIdx = -1
+		toRet[i].EndIdx = -1
 	}
 	return toRet
 }
@@ -27,7 +27,7 @@ func newMatch(size int) Match {
 func (m Match) numValidGroups() int {
 	numValid := 0
 	for _, g := range m {
-		if g.startIdx >= 0 && g.endIdx >= 0 {
+		if g.StartIdx >= 0 && g.EndIdx >= 0 {
 			numValid++
 		}
 	}
@@ -35,7 +35,7 @@ func (m Match) numValidGroups() int {
 }
 // Returns a string containing the indices of all (valid) groups in the match
-func (m Match) toString() string {
+func (m Match) ToString() string {
 	var toRet string
 	for i, g := range m {
 		if g.isValid() {
@@ -49,12 +49,12 @@ func (m Match) toString() string {
 // Converts the Group into a string representation:
 func (idx Group) toString() string {
-	return fmt.Sprintf("%d\t%d", idx.startIdx, idx.endIdx)
+	return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
 }
 // Returns whether a group contains valid indices
 func (g Group) isValid() bool {
-	return g.startIdx >= 0 && g.endIdx >= 0
+	return g.StartIdx >= 0 && g.EndIdx >= 0
 }
 // takeZeroState takes the 0-state (if such a transition exists) for all states in the
@@ -70,11 +70,11 @@ func takeZeroState(states []*State, numGroups int, idx int) (rtv []*State, isZer
 				}
 				copy(s.threadGroups, state.threadGroups)
 				if s.groupBegin {
-					s.threadGroups[s.groupNum].startIdx = idx
+					s.threadGroups[s.groupNum].StartIdx = idx
 					//					openParenGroups = append(openParenGroups, s.groupNum)
 				}
 				if s.groupEnd {
-					s.threadGroups[s.groupNum].endIdx = idx
+					s.threadGroups[s.groupNum].EndIdx = idx
 					//					closeParenGroups = append(closeParenGroups, s.groupNum)
 				}
 			}
@@ -118,17 +118,17 @@ func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*State) boo
 func pruneIndices(indices []Match) []Match {
 	// First, sort the slice by the start indices
 	sort.Slice(indices, func(i, j int) bool {
-		return indices[i][0].startIdx < indices[j][0].startIdx
+		return indices[i][0].StartIdx < indices[j][0].StartIdx
 	})
 	toRet := make([]Match, 0, len(indices))
 	current := indices[0]
 	for _, idx := range indices[1:] {
 		// idx doesn't overlap with current (starts after current ends), so add current to result
 		// and update the current.
-		if idx[0].startIdx >= current[0].endIdx {
+		if idx[0].StartIdx >= current[0].EndIdx {
 			toRet = append(toRet, current)
 			current = idx
-		} else if idx[0].endIdx > current[0].endIdx {
+		} else if idx[0].EndIdx > current[0].EndIdx {
 			// idx overlaps, but it is longer, so update current
 			current = idx
 		}
@@ -147,7 +147,7 @@ func FindString(regex Reg, str string) string {
 	if err != nil {
 		return ""
 	}
-	return str[match[0].startIdx:match[0].endIdx]
+	return str[match[0].StartIdx:match[0].EndIdx]
 }
 // FindAllString is the 'all' version of FindString.
@@ -247,7 +247,7 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
 	start.threadGroups = newMatch(numGroups + 1)
 	// Check if the start state begins a group - if so, add the start index to our list
 	if start.groupBegin {
-		start.threadGroups[start.groupNum].startIdx = i
+		start.threadGroups[start.groupNum].StartIdx = i
 		//		tempIndices[start.groupNum].startIdx = i
 	}
@@ -356,10 +356,10 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
 			//	i++
 			//			}
 			if tempIndices.numValidGroups() > 0 && tempIndices[0].isValid() {
-				if tempIndices[0].startIdx == tempIndices[0].endIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
+				if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
-					return true, tempIndices, tempIndices[0].endIdx + 1
+					return true, tempIndices, tempIndices[0].EndIdx + 1
 				} else {
-					return true, tempIndices, tempIndices[0].endIdx
+					return true, tempIndices, tempIndices[0].EndIdx
 				}
 			}
 			return false, []Group{}, startIdx
@@ -402,10 +402,10 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
 	}
 	if tempIndices.numValidGroups() > 0 {
-		if tempIndices[0].startIdx == tempIndices[0].endIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
+		if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
-			return true, tempIndices, tempIndices[0].endIdx + 1
+			return true, tempIndices, tempIndices[0].EndIdx + 1
 		} else {
-			return true, tempIndices, tempIndices[0].endIdx
+			return true, tempIndices, tempIndices[0].EndIdx
 		}
 	}
 	if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
--- a/greg/misc.go
+++ b/greg/misc.go
@@ -1,4 +1,4 @@
-package main
+package greg
 import (
 	"slices"
@@ -72,17 +72,6 @@ func allEqual[T comparable](items ...T) bool {
 	return true
 }
 // Returns all elements in slice A that are NOT in slice B
 func setDifference[T comparable](s1 []T, s2 []T) []T {
 	toReturn := make([]T, 0, len(s1))
 	for _, val := range s1 {
 		if !slices.Contains(s2, val) {
 			toReturn = append(toReturn, val)
 		}
 	}
 	return toReturn
 }
 // Map function - convert a slice of T to a slice of V, based on a function
 // that maps a T to a V
 func Map[T, V any](slc []T, fn func(T) V) []V {
@@ -108,19 +97,12 @@ func Reduce[T any](slc []T, fn func(T, T) T) T {
 	return slc[0]
 }
 // Generate numbers in a range - start (inclusive) to end (exclusive)
 func genRange[T character](start, end T) []T {
 	toRet := make([]T, end-start)
 	for i := start; i < end; i++ {
 		toRet[i-start] = i
 	}
 	return toRet
 }
 // Generate numbers in a range - start to end (both inclusive)
 func genRangeInclusive[T character](start, end T) []T {
-	toRet := genRange(start, end)
+	toRet := make([]T, (end-start)+1)
-	toRet = append(toRet, end)
+	for i := start; i <= end; i++ {
 		toRet[i-start] = i
 	}
 	return toRet
 }
--- a/greg/nfa.go
+++ b/greg/nfa.go
@@ -1,4 +1,4 @@
-package main
+package greg
 import (
 	"fmt"
@@ -150,12 +150,12 @@ func (s State) checkAssertion(str []rune, idx int) bool {
 		numMatchesFound := 0
 		for _, matchIdx := range matchIndices {
 			if s.assert == PLA || s.assert == NLA { // Lookahead - return true (or false) if at least one match starts at 0. Zero is used because the test-string _starts_ from idx.
-				if matchIdx[0].startIdx == 0 {
+				if matchIdx[0].StartIdx == 0 {
 					numMatchesFound++
 				}
 			}
 			if s.assert == PLB || s.assert == NLB { // Lookbehind - return true (or false) if at least one match _ends_ at the current index.
-				if matchIdx[0].endIdx == idx {
+				if matchIdx[0].EndIdx == idx {
 					numMatchesFound++
 				}
 			}
--- a/greg/noteOnPCREBackreferences.txt
+++ b/greg/noteOnPCREBackreferences.txt
--- a/greg/postfixNode.go
+++ b/greg/postfixNode.go
@@ -1,4 +1,4 @@
-package main
+package greg
 import "fmt"
--- a/greg/range2regex.go
+++ b/greg/range2regex.go
@@ -1,4 +1,4 @@
-package main
+package greg
 import (
 	"fmt"
--- a/greg/re_test.go
+++ b/greg/re_test.go
@@ -1,4 +1,4 @@
-package main
+package greg
 import (
 	"fmt"
@@ -476,6 +476,10 @@ var reTests = []struct {
 	{`a[^>]*b`, nil, `a>b`, []Group{}},
 	{`^a*$`, nil, `foo`, []Group{}},
 	// Out-of-bounds for character classes
 	{`[b-e]`, nil, `a`, []Group{}},
 	{`[b-e]`, nil, `f`, []Group{}},
 	{`*?`, nil, `-`, nil},
 	{`a*?`, nil, `-`, nil}, // non-greedy operators are not supported
@@ -703,7 +707,7 @@ func TestFindString(t *testing.T) {
 						t.Errorf("Expected no match got %v\n", foundString)
 					}
 				} else {
-					expectedString := test.str[test.result[0].startIdx:test.result[0].endIdx]
+					expectedString := test.str[test.result[0].StartIdx:test.result[0].EndIdx]
 					if foundString != expectedString {
 						t.Errorf("Wanted %v	Got %v\n", expectedString, foundString)
 					}
--- a/greg/re_tests.py
+++ b/greg/re_tests.py
--- a/greg/re_tests_uniq.py
+++ b/greg/re_tests_uniq.py
--- a/greg/sliceQueue.go
+++ b/greg/sliceQueue.go
@@ -1,4 +1,4 @@
-package main
+package greg
 import "errors"
--- a/greg/stateContents.go
+++ b/greg/stateContents.go
@@ -1,4 +1,4 @@
-package main
+package greg
 type stateContents []int // Represents the contents of the current state - character classes can have multiple contents, which is why it is represented as a slice
--- a/greg/todo.txt
+++ b/greg/todo.txt
Author	SHA1	Message	Date
Aadhavan Srinivasan	ca8d32cd7f	Added more tests	2025-01-30 09:09:34 -05:00
Aadhavan Srinivasan	368941e5c7	Fix out-of-bounds access in genRangeInclusive	2025-01-30 09:09:20 -05:00
Aadhavan Srinivasan	b9da5ec08d	Replace use of 'genRange' with 'genRangeInclusive'	2025-01-30 09:09:03 -05:00
Aadhavan Srinivasan	c5a43c47f0	Update type and method references to use qualified names	2025-01-30 09:03:37 -05:00
Aadhavan Srinivasan	c3c3829ac9	Move 'genRange' function to 'cmd'	2025-01-30 09:03:07 -05:00
Aadhavan Srinivasan	ee6bb3959c	Removed function that wasn't being used in 'greg', moved to 'main'	2025-01-30 08:58:43 -05:00
Aadhavan Srinivasan	c06d81d17d	Updated struct field reference	2025-01-30 08:58:11 -05:00
Aadhavan Srinivasan	1a2f1b7ca9	Restructured code into 'cmd' module with CLI and 'greg' module with regex library; export necessary struct fields and methods	2025-01-30 08:56:12 -05:00
`@@ -1,4 +1,4 @@`
	`module re`	`module gitea.twomorecents.org/Rockingcool/kg`

	`go 1.23.1`	`go 1.23.1`
`@@ -1,4 +1,4 @@`
	`package main`	`package greg`

	`import "fmt"`	`import "fmt"`