13 Commits

Author SHA1 Message Date
17e897e2d6 Accept escaped hyphens, even outside character class 2025-04-21 19:11:37 -04:00
777c590072 Add options to build unoptimized versions; make optimized the default 2025-04-21 19:10:55 -04:00
1d32865e76 Added extra check if fileContainsNullChar returns err; pass filename to it instead of file 2025-04-18 22:35:48 -04:00
66f4ca31d1 Use filename instead of file handler to scan file for null byte 2025-04-18 22:35:22 -04:00
d81c72590a Updated gitignore to contain new executable name 2025-04-16 16:59:48 -04:00
83632f2abc Don't compile the regex if no valid files were given (eg. all files are directories); print error if file is a binary file (contains NULL character) 2025-04-16 16:58:27 -04:00
fc0af1ccc5 Updated README 2025-03-28 09:09:50 -04:00
980fb77114 Makefile changes 2025-03-28 09:07:29 -04:00
4c4d747a9c Renamed 'cmd' to 'kg' so that go install works correctly 2025-03-28 09:06:12 -04:00
595b86df60 Added comment to function 2025-03-25 10:28:29 -04:00
5f9bab528a Don't quit if a file is not found, continue with any other files that are found 2025-03-18 11:45:50 -04:00
530564b920 Print error if input file is a directory; print program name before error string 2025-03-16 19:48:49 -04:00
02b3b469c4 Added support for line num flag 2025-03-16 19:23:23 -04:00
8 changed files with 82 additions and 18 deletions

3
.gitignore vendored
View File

@@ -1,2 +1 @@
re
kg/kg

View File

@@ -5,9 +5,13 @@ fmt:
go fmt ./...
vet: fmt
go vet ./...
buildLib: vet
buildLibUnopt: vet
go build -gcflags="all=-N -l" ./...
unopt: buildLibUnopt
go build -C kg/ -gcflags="all=-N -l" -o kg ./...
buildLib: vet
go build ./...
buildCmd: buildLib
go build -C cmd/ -gcflags="all=-N -l" -o re ./...
go build -C kg/ -o kg ./...
test: buildCmd
go test -v ./...

View File

@@ -15,3 +15,7 @@ It also includes features not present in regexp, such as lookarounds and backref
The syntax is, for the most part, a superset of Go's regexp. A full overview of the syntax can be found [here](https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex#hdr-Syntax).
__For more information, see https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex__.
### How it works
I've written about the inner workings of the engine [on my blog](https://twomorecents.org/writing-regex-engine/index.html).

View File

@@ -1,6 +1,11 @@
package main
import "slices"
import (
"bufio"
"os"
"slices"
"strings"
)
type character interface {
int | rune
@@ -25,3 +30,23 @@ func genRange[T character](start, end T) []T {
}
return toRet
}
// Returns whether or not the given file contains a NULL character
func fileContainsNullChar(filename string) (bool, error) {
file, err := os.Open(filename)
if err != nil {
return true, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if strings.Contains(line, "\000") {
return true, nil
}
}
if err := scanner.Err(); err != nil {
return true, err
}
return false, nil
}

View File

@@ -61,6 +61,11 @@ func main() {
panic("Invalid match number to print.")
}
// Enable lineFlag if lineNumFlag is enabled
if *lineNumFlag {
*lineFlag = true
}
// Process:
// 1. Convert regex into postfix notation (Shunting-Yard algorithm)
// a. Add explicit concatenation operators to facilitate this
@@ -68,11 +73,11 @@ func main() {
// 3. Run the string against the NFA
if len(flag.Args()) < 1 { // flag.Args() also strips out program name
fmt.Println("ERROR: Missing cmdline args")
fmt.Printf("%s: ERROR: Missing cmdline args\n", os.Args[0])
os.Exit(22)
}
if *recursiveFlag && len(flag.Args()) < 2 { // File/Directory must be provided with '-r'
fmt.Println("ERROR: Missing cmdline args")
fmt.Printf("%s: ERROR: Missing cmdline args\n", os.Args[0])
os.Exit(22)
}
var re string
@@ -86,13 +91,37 @@ func main() {
inputFilenames := flag.Args()[1:]
for _, inputFilename := range inputFilenames {
inputFile, err := os.Open(inputFilename)
defer inputFile.Close()
if err != nil {
fmt.Printf("%s: No such file or directory\n", flag.Args()[1])
os.Exit(2)
fmt.Printf("%s: %s: No such file or directory\n", os.Args[0], inputFilename)
} else {
fileStat, err := inputFile.Stat()
if err != nil {
fmt.Printf("%v\n", err)
os.Exit(2)
} else {
if fileStat.Mode().IsDir() {
fmt.Printf("%s: %s: Is a directory\n", os.Args[0], inputFilename)
} else {
var nullCharPresent bool
if nullCharPresent, err = fileContainsNullChar(inputFilename); nullCharPresent {
if err != nil {
fmt.Printf("%v\n", err)
os.Exit(1)
} else {
fmt.Printf("%s: %s: Is a binary file\n", os.Args[0], inputFilename)
}
} else {
inputFiles = append(inputFiles, inputFile)
}
}
}
}
inputFiles = append(inputFiles, inputFile)
}
}
if len(inputFiles) == 0 { // No valid files given
os.Exit(2)
}
var test_str string
var err error
@@ -108,6 +137,7 @@ func main() {
}
for _, inputFile := range inputFiles {
lineNum = 0
reader := bufio.NewReader(inputFile)
linesRead = false
for true {
@@ -202,7 +232,12 @@ func main() {
if !(*invertFlag) && len(matchIndices) == 0 || *invertFlag && len(matchIndices) > 0 {
continue
} else {
color.New(color.FgMagenta).Fprintf(out, "%s: ", inputFile.Name()) // Print filename
if *recursiveFlag || len(flag.Args()) > 2 { // If we have 2 args, then we're only searching 1 file. We should only print the filename if there's more than 1 file.
color.New(color.FgMagenta).Fprintf(out, "%s:", inputFile.Name()) // Print filename
}
if *lineNumFlag {
color.New(color.FgGreen).Fprintf(out, "%d:", lineNum) // Print filename
}
}
}

View File

@@ -47,6 +47,7 @@ func (re *Reg) UnmarshalText(text []byte) error {
return err
}
// Longest makes future searches prefer the longest branch of an alternation, as opposed to the leftmost branch.
func (re *Reg) Longest() {
re.preferLongest = true
}

View File

@@ -131,13 +131,9 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
case 'v': // Vertical tab
toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, rune(11))
case '-': // Literal hyphen - only in character class
if inCharClass {
toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, '-')
} else {
return postfixNode{}, fmt.Errorf("invalid escape character")
}
case '-': // Literal hyphen
toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, '-')
default: // None of the above - append it as a regular character
if isNormalChar(c) { // Normal characters cannot be escaped
return postfixNode{}, fmt.Errorf("invalid escape character")