8 Commits

17 changed files with 84 additions and 69 deletions

27
cmd/helpers.go Normal file
View File

@@ -0,0 +1,27 @@
package main
import "slices"
type character interface {
int | rune
}
// Returns all elements in slice A that are NOT in slice B
func setDifference[T comparable](s1 []T, s2 []T) []T {
toReturn := make([]T, 0, len(s1))
for _, val := range s1 {
if !slices.Contains(s2, val) {
toReturn = append(toReturn, val)
}
}
return toReturn
}
// Generate numbers in a range - start (inclusive) to end (exclusive)
func genRange[T character](start, end T) []T {
toRet := make([]T, end-start)
for i := start; i < end; i++ {
toRet[i-start] = i
}
return toRet
}

View File

@@ -8,11 +8,13 @@ import (
"os" "os"
"github.com/fatih/color" "github.com/fatih/color"
"gitea.twomorecents.org/Rockingcool/kg/greg"
) )
func main() { func main() {
// Flags for the regex Compile function // Flags for the regex Compile function
flagsToCompile := make([]ReFlag, 0) flagsToCompile := make([]greg.ReFlag, 0)
invertFlag := flag.Bool("v", false, "Invert match.") invertFlag := flag.Bool("v", false, "Invert match.")
// This flag has two 'modes': // This flag has two 'modes':
@@ -29,10 +31,10 @@ func main() {
// These flags have to be passed to the Compile function // These flags have to be passed to the Compile function
if *multiLineFlag { if *multiLineFlag {
flagsToCompile = append(flagsToCompile, RE_MULTILINE, RE_SINGLE_LINE) flagsToCompile = append(flagsToCompile, greg.RE_MULTILINE, greg.RE_SINGLE_LINE)
} }
if *caseInsensitiveFlag { if *caseInsensitiveFlag {
flagsToCompile = append(flagsToCompile, RE_CASE_INSENSITIVE) flagsToCompile = append(flagsToCompile, greg.RE_CASE_INSENSITIVE)
} }
// -l and -o are mutually exclusive: -o overrides -l // -l and -o are mutually exclusive: -o overrides -l
@@ -76,7 +78,7 @@ func main() {
reader := bufio.NewReader(os.Stdin) reader := bufio.NewReader(os.Stdin)
out := bufio.NewWriter(os.Stdout) out := bufio.NewWriter(os.Stdout)
regComp, err := Compile(re, flagsToCompile...) regComp, err := greg.Compile(re, flagsToCompile...)
if err != nil { if err != nil {
fmt.Println(err) fmt.Println(err)
return return
@@ -117,14 +119,14 @@ func main() {
panic(err) panic(err)
} }
} }
matchIndices := make([]Match, 0) matchIndices := make([]greg.Match, 0)
if matchNumFlagEnabled { if matchNumFlagEnabled {
tmp, err := FindNthMatch(regComp, test_str, *matchNum) tmp, err := greg.FindNthMatch(regComp, test_str, *matchNum)
if err == nil { if err == nil {
matchIndices = append(matchIndices, tmp) matchIndices = append(matchIndices, tmp)
} }
} else { } else {
matchIndices = FindAllMatches(regComp, test_str) matchIndices = greg.FindAllMatches(regComp, test_str)
} }
if *printMatchesFlag { if *printMatchesFlag {
@@ -135,7 +137,7 @@ func main() {
fmt.Fprintf(out, "Line %d:\n", lineNum) fmt.Fprintf(out, "Line %d:\n", lineNum)
} }
for _, m := range matchIndices { for _, m := range matchIndices {
fmt.Fprintf(out, "%s\n", m.toString()) fmt.Fprintf(out, "%s\n", m.ToString())
} }
err := out.Flush() err := out.Flush()
if err != nil { if err != nil {
@@ -148,7 +150,7 @@ func main() {
// This should make checking O(1) instead of O(n) // This should make checking O(1) instead of O(n)
indicesToPrint := new_uniq_arr[int]() indicesToPrint := new_uniq_arr[int]()
for _, idx := range matchIndices { for _, idx := range matchIndices {
indicesToPrint.add(genRange(idx[0].startIdx, idx[0].endIdx)...) indicesToPrint.add(genRange(idx[0].StartIdx, idx[0].EndIdx)...)
} }
// If we are inverting, then we should print the indices which _didn't_ match // If we are inverting, then we should print the indices which _didn't_ match
// in color. // in color.
@@ -183,9 +185,9 @@ func main() {
for i := range test_str { for i := range test_str {
inMatchIndex := false inMatchIndex := false
for _, m := range matchIndices { for _, m := range matchIndices {
if i == m[0].startIdx { if i == m[0].StartIdx {
fmt.Fprintf(out, "%s", *substituteText) fmt.Fprintf(out, "%s", *substituteText)
i = m[0].endIdx i = m[0].EndIdx
inMatchIndex = true inMatchIndex = true
break break
} }
@@ -201,7 +203,7 @@ func main() {
// Newline after every match - only if -o is enabled and -v is disabled. // Newline after every match - only if -o is enabled and -v is disabled.
if *onlyFlag && !(*invertFlag) { if *onlyFlag && !(*invertFlag) {
for _, idx := range matchIndices { for _, idx := range matchIndices {
if i+1 == idx[0].endIdx { // End index is one more than last index of match if i+1 == idx[0].EndIdx { // End index is one more than last index of match
fmt.Fprintf(out, "\n") fmt.Fprintf(out, "\n")
break break
} }

2
go.mod
View File

@@ -1,4 +1,4 @@
module re module gitea.twomorecents.org/Rockingcool/kg
go 1.23.1 go 1.23.1

View File

@@ -1,4 +1,4 @@
package main package greg
import ( import (
"fmt" "fmt"
@@ -682,7 +682,7 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
if startRangeRune > endRangeRune { if startRangeRune > endRangeRune {
return nil, fmt.Errorf("character range syntax is [a-b], not [b-a]") return nil, fmt.Errorf("character range syntax is [a-b], not [b-a]")
} }
chars = append(chars, newPostfixCharNode(genRange(startRangeRune, endRangeRune+1)...)) chars = append(chars, newPostfixCharNode(genRangeInclusive(startRangeRune, endRangeRune)...))
} }
endOfRange = false // Reset the flag endOfRange = false // Reset the flag

View File

@@ -1,4 +1,4 @@
package main package greg
import ( import (
"fmt" "fmt"
@@ -10,15 +10,15 @@ type Match []Group
// a Group represents a group. It contains the start index and end index of the match // a Group represents a group. It contains the start index and end index of the match
type Group struct { type Group struct {
startIdx int StartIdx int
endIdx int EndIdx int
} }
func newMatch(size int) Match { func newMatch(size int) Match {
toRet := make([]Group, size) toRet := make([]Group, size)
for i := range toRet { for i := range toRet {
toRet[i].startIdx = -1 toRet[i].StartIdx = -1
toRet[i].endIdx = -1 toRet[i].EndIdx = -1
} }
return toRet return toRet
} }
@@ -27,7 +27,7 @@ func newMatch(size int) Match {
func (m Match) numValidGroups() int { func (m Match) numValidGroups() int {
numValid := 0 numValid := 0
for _, g := range m { for _, g := range m {
if g.startIdx >= 0 && g.endIdx >= 0 { if g.StartIdx >= 0 && g.EndIdx >= 0 {
numValid++ numValid++
} }
} }
@@ -35,7 +35,7 @@ func (m Match) numValidGroups() int {
} }
// Returns a string containing the indices of all (valid) groups in the match // Returns a string containing the indices of all (valid) groups in the match
func (m Match) toString() string { func (m Match) ToString() string {
var toRet string var toRet string
for i, g := range m { for i, g := range m {
if g.isValid() { if g.isValid() {
@@ -49,12 +49,12 @@ func (m Match) toString() string {
// Converts the Group into a string representation: // Converts the Group into a string representation:
func (idx Group) toString() string { func (idx Group) toString() string {
return fmt.Sprintf("%d\t%d", idx.startIdx, idx.endIdx) return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
} }
// Returns whether a group contains valid indices // Returns whether a group contains valid indices
func (g Group) isValid() bool { func (g Group) isValid() bool {
return g.startIdx >= 0 && g.endIdx >= 0 return g.StartIdx >= 0 && g.EndIdx >= 0
} }
// takeZeroState takes the 0-state (if such a transition exists) for all states in the // takeZeroState takes the 0-state (if such a transition exists) for all states in the
@@ -70,11 +70,11 @@ func takeZeroState(states []*State, numGroups int, idx int) (rtv []*State, isZer
} }
copy(s.threadGroups, state.threadGroups) copy(s.threadGroups, state.threadGroups)
if s.groupBegin { if s.groupBegin {
s.threadGroups[s.groupNum].startIdx = idx s.threadGroups[s.groupNum].StartIdx = idx
// openParenGroups = append(openParenGroups, s.groupNum) // openParenGroups = append(openParenGroups, s.groupNum)
} }
if s.groupEnd { if s.groupEnd {
s.threadGroups[s.groupNum].endIdx = idx s.threadGroups[s.groupNum].EndIdx = idx
// closeParenGroups = append(closeParenGroups, s.groupNum) // closeParenGroups = append(closeParenGroups, s.groupNum)
} }
} }
@@ -118,17 +118,17 @@ func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*State) boo
func pruneIndices(indices []Match) []Match { func pruneIndices(indices []Match) []Match {
// First, sort the slice by the start indices // First, sort the slice by the start indices
sort.Slice(indices, func(i, j int) bool { sort.Slice(indices, func(i, j int) bool {
return indices[i][0].startIdx < indices[j][0].startIdx return indices[i][0].StartIdx < indices[j][0].StartIdx
}) })
toRet := make([]Match, 0, len(indices)) toRet := make([]Match, 0, len(indices))
current := indices[0] current := indices[0]
for _, idx := range indices[1:] { for _, idx := range indices[1:] {
// idx doesn't overlap with current (starts after current ends), so add current to result // idx doesn't overlap with current (starts after current ends), so add current to result
// and update the current. // and update the current.
if idx[0].startIdx >= current[0].endIdx { if idx[0].StartIdx >= current[0].EndIdx {
toRet = append(toRet, current) toRet = append(toRet, current)
current = idx current = idx
} else if idx[0].endIdx > current[0].endIdx { } else if idx[0].EndIdx > current[0].EndIdx {
// idx overlaps, but it is longer, so update current // idx overlaps, but it is longer, so update current
current = idx current = idx
} }
@@ -147,7 +147,7 @@ func FindString(regex Reg, str string) string {
if err != nil { if err != nil {
return "" return ""
} }
return str[match[0].startIdx:match[0].endIdx] return str[match[0].StartIdx:match[0].EndIdx]
} }
// FindAllString is the 'all' version of FindString. // FindAllString is the 'all' version of FindString.
@@ -247,7 +247,7 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
start.threadGroups = newMatch(numGroups + 1) start.threadGroups = newMatch(numGroups + 1)
// Check if the start state begins a group - if so, add the start index to our list // Check if the start state begins a group - if so, add the start index to our list
if start.groupBegin { if start.groupBegin {
start.threadGroups[start.groupNum].startIdx = i start.threadGroups[start.groupNum].StartIdx = i
// tempIndices[start.groupNum].startIdx = i // tempIndices[start.groupNum].startIdx = i
} }
@@ -356,10 +356,10 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
// i++ // i++
// } // }
if tempIndices.numValidGroups() > 0 && tempIndices[0].isValid() { if tempIndices.numValidGroups() > 0 && tempIndices[0].isValid() {
if tempIndices[0].startIdx == tempIndices[0].endIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over. if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
return true, tempIndices, tempIndices[0].endIdx + 1 return true, tempIndices, tempIndices[0].EndIdx + 1
} else { } else {
return true, tempIndices, tempIndices[0].endIdx return true, tempIndices, tempIndices[0].EndIdx
} }
} }
return false, []Group{}, startIdx return false, []Group{}, startIdx
@@ -402,10 +402,10 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
} }
if tempIndices.numValidGroups() > 0 { if tempIndices.numValidGroups() > 0 {
if tempIndices[0].startIdx == tempIndices[0].endIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over. if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
return true, tempIndices, tempIndices[0].endIdx + 1 return true, tempIndices, tempIndices[0].EndIdx + 1
} else { } else {
return true, tempIndices, tempIndices[0].endIdx return true, tempIndices, tempIndices[0].EndIdx
} }
} }
if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over. if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.

View File

@@ -1,4 +1,4 @@
package main package greg
import ( import (
"slices" "slices"
@@ -72,17 +72,6 @@ func allEqual[T comparable](items ...T) bool {
return true return true
} }
// Returns all elements in slice A that are NOT in slice B
func setDifference[T comparable](s1 []T, s2 []T) []T {
toReturn := make([]T, 0, len(s1))
for _, val := range s1 {
if !slices.Contains(s2, val) {
toReturn = append(toReturn, val)
}
}
return toReturn
}
// Map function - convert a slice of T to a slice of V, based on a function // Map function - convert a slice of T to a slice of V, based on a function
// that maps a T to a V // that maps a T to a V
func Map[T, V any](slc []T, fn func(T) V) []V { func Map[T, V any](slc []T, fn func(T) V) []V {
@@ -108,19 +97,12 @@ func Reduce[T any](slc []T, fn func(T, T) T) T {
return slc[0] return slc[0]
} }
// Generate numbers in a range - start (inclusive) to end (exclusive)
func genRange[T character](start, end T) []T {
toRet := make([]T, end-start)
for i := start; i < end; i++ {
toRet[i-start] = i
}
return toRet
}
// Generate numbers in a range - start to end (both inclusive) // Generate numbers in a range - start to end (both inclusive)
func genRangeInclusive[T character](start, end T) []T { func genRangeInclusive[T character](start, end T) []T {
toRet := genRange(start, end) toRet := make([]T, (end-start)+1)
toRet = append(toRet, end) for i := start; i <= end; i++ {
toRet[i-start] = i
}
return toRet return toRet
} }

View File

@@ -1,4 +1,4 @@
package main package greg
import ( import (
"fmt" "fmt"
@@ -150,12 +150,12 @@ func (s State) checkAssertion(str []rune, idx int) bool {
numMatchesFound := 0 numMatchesFound := 0
for _, matchIdx := range matchIndices { for _, matchIdx := range matchIndices {
if s.assert == PLA || s.assert == NLA { // Lookahead - return true (or false) if at least one match starts at 0. Zero is used because the test-string _starts_ from idx. if s.assert == PLA || s.assert == NLA { // Lookahead - return true (or false) if at least one match starts at 0. Zero is used because the test-string _starts_ from idx.
if matchIdx[0].startIdx == 0 { if matchIdx[0].StartIdx == 0 {
numMatchesFound++ numMatchesFound++
} }
} }
if s.assert == PLB || s.assert == NLB { // Lookbehind - return true (or false) if at least one match _ends_ at the current index. if s.assert == PLB || s.assert == NLB { // Lookbehind - return true (or false) if at least one match _ends_ at the current index.
if matchIdx[0].endIdx == idx { if matchIdx[0].EndIdx == idx {
numMatchesFound++ numMatchesFound++
} }
} }

View File

@@ -1,4 +1,4 @@
package main package greg
import "fmt" import "fmt"

View File

@@ -1,4 +1,4 @@
package main package greg
import ( import (
"fmt" "fmt"

View File

@@ -1,4 +1,4 @@
package main package greg
import ( import (
"fmt" "fmt"
@@ -476,6 +476,10 @@ var reTests = []struct {
{`a[^>]*b`, nil, `a>b`, []Group{}}, {`a[^>]*b`, nil, `a>b`, []Group{}},
{`^a*$`, nil, `foo`, []Group{}}, {`^a*$`, nil, `foo`, []Group{}},
// Out-of-bounds for character classes
{`[b-e]`, nil, `a`, []Group{}},
{`[b-e]`, nil, `f`, []Group{}},
{`*?`, nil, `-`, nil}, {`*?`, nil, `-`, nil},
{`a*?`, nil, `-`, nil}, // non-greedy operators are not supported {`a*?`, nil, `-`, nil}, // non-greedy operators are not supported
@@ -703,7 +707,7 @@ func TestFindString(t *testing.T) {
t.Errorf("Expected no match got %v\n", foundString) t.Errorf("Expected no match got %v\n", foundString)
} }
} else { } else {
expectedString := test.str[test.result[0].startIdx:test.result[0].endIdx] expectedString := test.str[test.result[0].StartIdx:test.result[0].EndIdx]
if foundString != expectedString { if foundString != expectedString {
t.Errorf("Wanted %v Got %v\n", expectedString, foundString) t.Errorf("Wanted %v Got %v\n", expectedString, foundString)
} }

View File

@@ -1,4 +1,4 @@
package main package greg
import "errors" import "errors"

View File

@@ -1,4 +1,4 @@
package main package greg
type stateContents []int // Represents the contents of the current state - character classes can have multiple contents, which is why it is represented as a slice type stateContents []int // Represents the contents of the current state - character classes can have multiple contents, which is why it is represented as a slice