Compare commits
13 Commits
e489dc4c27
...
v0.6.2
Author | SHA1 | Date | |
---|---|---|---|
17e897e2d6 | |||
777c590072 | |||
1d32865e76 | |||
66f4ca31d1 | |||
d81c72590a | |||
83632f2abc | |||
fc0af1ccc5 | |||
980fb77114 | |||
4c4d747a9c | |||
595b86df60 | |||
5f9bab528a | |||
530564b920 | |||
02b3b469c4 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,2 +1 @@
|
||||
re
|
||||
|
||||
kg/kg
|
||||
|
8
Makefile
8
Makefile
@@ -5,9 +5,13 @@ fmt:
|
||||
go fmt ./...
|
||||
vet: fmt
|
||||
go vet ./...
|
||||
buildLib: vet
|
||||
buildLibUnopt: vet
|
||||
go build -gcflags="all=-N -l" ./...
|
||||
unopt: buildLibUnopt
|
||||
go build -C kg/ -gcflags="all=-N -l" -o kg ./...
|
||||
buildLib: vet
|
||||
go build ./...
|
||||
buildCmd: buildLib
|
||||
go build -C cmd/ -gcflags="all=-N -l" -o re ./...
|
||||
go build -C kg/ -o kg ./...
|
||||
test: buildCmd
|
||||
go test -v ./...
|
||||
|
@@ -15,3 +15,7 @@ It also includes features not present in regexp, such as lookarounds and backref
|
||||
The syntax is, for the most part, a superset of Go's regexp. A full overview of the syntax can be found [here](https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex#hdr-Syntax).
|
||||
|
||||
__For more information, see https://pkg.go.dev/gitea.twomorecents.org/Rockingcool/kleingrep/regex__.
|
||||
|
||||
### How it works
|
||||
|
||||
I've written about the inner workings of the engine [on my blog](https://twomorecents.org/writing-regex-engine/index.html).
|
||||
|
@@ -1,6 +1,11 @@
|
||||
package main
|
||||
|
||||
import "slices"
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type character interface {
|
||||
int | rune
|
||||
@@ -25,3 +30,23 @@ func genRange[T character](start, end T) []T {
|
||||
}
|
||||
return toRet
|
||||
}
|
||||
|
||||
// Returns whether or not the given file contains a NULL character
|
||||
func fileContainsNullChar(filename string) (bool, error) {
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return true, err
|
||||
}
|
||||
defer file.Close()
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.Contains(line, "\000") {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return true, err
|
||||
}
|
||||
return false, nil
|
||||
}
|
@@ -61,6 +61,11 @@ func main() {
|
||||
panic("Invalid match number to print.")
|
||||
}
|
||||
|
||||
// Enable lineFlag if lineNumFlag is enabled
|
||||
if *lineNumFlag {
|
||||
*lineFlag = true
|
||||
}
|
||||
|
||||
// Process:
|
||||
// 1. Convert regex into postfix notation (Shunting-Yard algorithm)
|
||||
// a. Add explicit concatenation operators to facilitate this
|
||||
@@ -68,11 +73,11 @@ func main() {
|
||||
// 3. Run the string against the NFA
|
||||
|
||||
if len(flag.Args()) < 1 { // flag.Args() also strips out program name
|
||||
fmt.Println("ERROR: Missing cmdline args")
|
||||
fmt.Printf("%s: ERROR: Missing cmdline args\n", os.Args[0])
|
||||
os.Exit(22)
|
||||
}
|
||||
if *recursiveFlag && len(flag.Args()) < 2 { // File/Directory must be provided with '-r'
|
||||
fmt.Println("ERROR: Missing cmdline args")
|
||||
fmt.Printf("%s: ERROR: Missing cmdline args\n", os.Args[0])
|
||||
os.Exit(22)
|
||||
}
|
||||
var re string
|
||||
@@ -86,13 +91,37 @@ func main() {
|
||||
inputFilenames := flag.Args()[1:]
|
||||
for _, inputFilename := range inputFilenames {
|
||||
inputFile, err := os.Open(inputFilename)
|
||||
defer inputFile.Close()
|
||||
if err != nil {
|
||||
fmt.Printf("%s: No such file or directory\n", flag.Args()[1])
|
||||
os.Exit(2)
|
||||
fmt.Printf("%s: %s: No such file or directory\n", os.Args[0], inputFilename)
|
||||
} else {
|
||||
fileStat, err := inputFile.Stat()
|
||||
if err != nil {
|
||||
fmt.Printf("%v\n", err)
|
||||
os.Exit(2)
|
||||
} else {
|
||||
if fileStat.Mode().IsDir() {
|
||||
fmt.Printf("%s: %s: Is a directory\n", os.Args[0], inputFilename)
|
||||
} else {
|
||||
var nullCharPresent bool
|
||||
if nullCharPresent, err = fileContainsNullChar(inputFilename); nullCharPresent {
|
||||
if err != nil {
|
||||
fmt.Printf("%v\n", err)
|
||||
os.Exit(1)
|
||||
} else {
|
||||
fmt.Printf("%s: %s: Is a binary file\n", os.Args[0], inputFilename)
|
||||
}
|
||||
} else {
|
||||
inputFiles = append(inputFiles, inputFile)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
inputFiles = append(inputFiles, inputFile)
|
||||
}
|
||||
}
|
||||
if len(inputFiles) == 0 { // No valid files given
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
var test_str string
|
||||
var err error
|
||||
@@ -108,6 +137,7 @@ func main() {
|
||||
}
|
||||
|
||||
for _, inputFile := range inputFiles {
|
||||
lineNum = 0
|
||||
reader := bufio.NewReader(inputFile)
|
||||
linesRead = false
|
||||
for true {
|
||||
@@ -202,7 +232,12 @@ func main() {
|
||||
if !(*invertFlag) && len(matchIndices) == 0 || *invertFlag && len(matchIndices) > 0 {
|
||||
continue
|
||||
} else {
|
||||
color.New(color.FgMagenta).Fprintf(out, "%s: ", inputFile.Name()) // Print filename
|
||||
if *recursiveFlag || len(flag.Args()) > 2 { // If we have 2 args, then we're only searching 1 file. We should only print the filename if there's more than 1 file.
|
||||
color.New(color.FgMagenta).Fprintf(out, "%s:", inputFile.Name()) // Print filename
|
||||
}
|
||||
if *lineNumFlag {
|
||||
color.New(color.FgGreen).Fprintf(out, "%d:", lineNum) // Print filename
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -47,6 +47,7 @@ func (re *Reg) UnmarshalText(text []byte) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Longest makes future searches prefer the longest branch of an alternation, as opposed to the leftmost branch.
|
||||
func (re *Reg) Longest() {
|
||||
re.preferLongest = true
|
||||
}
|
||||
|
@@ -131,13 +131,9 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
|
||||
case 'v': // Vertical tab
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, rune(11))
|
||||
case '-': // Literal hyphen - only in character class
|
||||
if inCharClass {
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, '-')
|
||||
} else {
|
||||
return postfixNode{}, fmt.Errorf("invalid escape character")
|
||||
}
|
||||
case '-': // Literal hyphen
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, '-')
|
||||
default: // None of the above - append it as a regular character
|
||||
if isNormalChar(c) { // Normal characters cannot be escaped
|
||||
return postfixNode{}, fmt.Errorf("invalid escape character")
|
||||
|
Reference in New Issue
Block a user