Compare commits
12 Commits
d62a429cce
...
v0.4.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 595b86df60 | |||
| 5f9bab528a | |||
| 530564b920 | |||
| 02b3b469c4 | |||
| e489dc4c27 | |||
| 34149980a4 | |||
| e79c19a929 | |||
| d2bce37935 | |||
| bb3b866b77 | |||
| e07f27dc78 | |||
| 65d2317f79 | |||
| a631fc289c |
63
cmd/main.go
63
cmd/main.go
@@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"slices"
|
||||||
|
|
||||||
"github.com/fatih/color"
|
"github.com/fatih/color"
|
||||||
|
|
||||||
@@ -25,6 +26,8 @@ func main() {
|
|||||||
multiLineFlag := flag.Bool("t", false, "Multi-line mode. Treats newline just like any character.")
|
multiLineFlag := flag.Bool("t", false, "Multi-line mode. Treats newline just like any character.")
|
||||||
printMatchesFlag := flag.Bool("p", false, "Prints start and end index of each match. Can only be used with '-t' for multi-line mode.")
|
printMatchesFlag := flag.Bool("p", false, "Prints start and end index of each match. Can only be used with '-t' for multi-line mode.")
|
||||||
caseInsensitiveFlag := flag.Bool("i", false, "Case-insensitive. Disregard the case of all characters.")
|
caseInsensitiveFlag := flag.Bool("i", false, "Case-insensitive. Disregard the case of all characters.")
|
||||||
|
recursiveFlag := flag.Bool("r", false, "Recursively search all files in the given directory.")
|
||||||
|
lineNumFlag := flag.Bool("n", false, "For each line with a match, print the line number. Implies -l.")
|
||||||
matchNum := flag.Int("m", 0, "Print the match with the given index. Eg. -m 3 prints the third match.")
|
matchNum := flag.Int("m", 0, "Print the match with the given index. Eg. -m 3 prints the third match.")
|
||||||
substituteText := flag.String("s", "", "Substitute the contents of each match with the given string. Overrides -o and -v")
|
substituteText := flag.String("s", "", "Substitute the contents of each match with the given string. Overrides -o and -v")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
@@ -58,31 +61,71 @@ func main() {
|
|||||||
panic("Invalid match number to print.")
|
panic("Invalid match number to print.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enable lineFlag if lineNumFlag is enabled
|
||||||
|
if *lineNumFlag {
|
||||||
|
*lineFlag = true
|
||||||
|
}
|
||||||
|
|
||||||
// Process:
|
// Process:
|
||||||
// 1. Convert regex into postfix notation (Shunting-Yard algorithm)
|
// 1. Convert regex into postfix notation (Shunting-Yard algorithm)
|
||||||
// a. Add explicit concatenation operators to facilitate this
|
// a. Add explicit concatenation operators to facilitate this
|
||||||
// 2. Build NFA from postfix representation (Thompson's algorithm)
|
// 2. Build NFA from postfix representation (Thompson's algorithm)
|
||||||
// 3. Run the string against the NFA
|
// 3. Run the string against the NFA
|
||||||
|
|
||||||
if len(flag.Args()) != 1 { // flag.Args() also strips out program name
|
if len(flag.Args()) < 1 { // flag.Args() also strips out program name
|
||||||
fmt.Println("ERROR: Missing cmdline args")
|
fmt.Printf("%s: ERROR: Missing cmdline args\n", os.Args[0])
|
||||||
|
os.Exit(22)
|
||||||
|
}
|
||||||
|
if *recursiveFlag && len(flag.Args()) < 2 { // File/Directory must be provided with '-r'
|
||||||
|
fmt.Printf("%s: ERROR: Missing cmdline args\n", os.Args[0])
|
||||||
os.Exit(22)
|
os.Exit(22)
|
||||||
}
|
}
|
||||||
var re string
|
var re string
|
||||||
re = flag.Args()[0]
|
re = flag.Args()[0]
|
||||||
|
var inputFiles []*os.File
|
||||||
|
if len(flag.Args()) == 1 || flag.Args()[1] == "-" { // Either no file argument, or file argument is "-"
|
||||||
|
if !slices.Contains(inputFiles, os.Stdin) {
|
||||||
|
inputFiles = append(inputFiles, os.Stdin) // os.Stdin cannot be entered more than once into the file list
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
inputFilenames := flag.Args()[1:]
|
||||||
|
for _, inputFilename := range inputFilenames {
|
||||||
|
inputFile, err := os.Open(inputFilename)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("%s: %s: No such file or directory\n", os.Args[0], inputFilename)
|
||||||
|
} else {
|
||||||
|
fileStat, err := inputFile.Stat()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("%v\n", err)
|
||||||
|
os.Exit(2)
|
||||||
|
} else {
|
||||||
|
if fileStat.Mode().IsDir() {
|
||||||
|
fmt.Printf("%s: %s: Is a directory\n", os.Args[0], inputFilename)
|
||||||
|
} else {
|
||||||
|
inputFiles = append(inputFiles, inputFile)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var test_str string
|
var test_str string
|
||||||
var err error
|
var err error
|
||||||
var linesRead bool // Whether or not we have read the lines in the file
|
var linesRead bool // Whether or not we have read the lines in the file
|
||||||
lineNum := 0 // Current line number
|
lineNum := 0 // Current line number
|
||||||
// Create reader for stdin and writer for stdout
|
// Create writer for stdout
|
||||||
reader := bufio.NewReader(os.Stdin)
|
|
||||||
out := bufio.NewWriter(os.Stdout)
|
out := bufio.NewWriter(os.Stdout)
|
||||||
|
// Compile regex
|
||||||
regComp, err := reg.Compile(re, flagsToCompile...)
|
regComp, err := reg.Compile(re, flagsToCompile...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, inputFile := range inputFiles {
|
||||||
|
lineNum = 0
|
||||||
|
reader := bufio.NewReader(inputFile)
|
||||||
|
linesRead = false
|
||||||
for true {
|
for true {
|
||||||
if linesRead {
|
if linesRead {
|
||||||
break
|
break
|
||||||
@@ -174,6 +217,13 @@ func main() {
|
|||||||
if *lineFlag {
|
if *lineFlag {
|
||||||
if !(*invertFlag) && len(matchIndices) == 0 || *invertFlag && len(matchIndices) > 0 {
|
if !(*invertFlag) && len(matchIndices) == 0 || *invertFlag && len(matchIndices) > 0 {
|
||||||
continue
|
continue
|
||||||
|
} else {
|
||||||
|
if *recursiveFlag || len(flag.Args()) > 2 { // If we have 2 args, then we're only searching 1 file. We should only print the filename if there's more than 1 file.
|
||||||
|
color.New(color.FgMagenta).Fprintf(out, "%s:", inputFile.Name()) // Print filename
|
||||||
|
}
|
||||||
|
if *lineNumFlag {
|
||||||
|
color.New(color.FgGreen).Fprintf(out, "%d:", lineNum) // Print filename
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -201,7 +251,7 @@ func main() {
|
|||||||
} else {
|
} else {
|
||||||
for i, c := range test_str_runes {
|
for i, c := range test_str_runes {
|
||||||
if indicesToPrint.contains(i) {
|
if indicesToPrint.contains(i) {
|
||||||
color.New(color.FgRed).Fprintf(out, "%c", c)
|
color.New(color.FgRed, color.Bold).Fprintf(out, "%c", c)
|
||||||
// Newline after every match - only if -o is enabled and -v is disabled.
|
// Newline after every match - only if -o is enabled and -v is disabled.
|
||||||
if *onlyFlag && !(*invertFlag) {
|
if *onlyFlag && !(*invertFlag) {
|
||||||
for matchIdxNum, idx := range matchIndices {
|
for matchIdxNum, idx := range matchIndices {
|
||||||
@@ -230,4 +280,5 @@ func main() {
|
|||||||
fmt.Println()
|
fmt.Println()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ func (re *Reg) UnmarshalText(text []byte) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Longest makes future searches prefer the longest branch of an alternation, as opposed to the leftmost branch.
|
||||||
func (re *Reg) Longest() {
|
func (re *Reg) Longest() {
|
||||||
re.preferLongest = true
|
re.preferLongest = true
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,10 +45,10 @@ type nfaState struct {
|
|||||||
groupBegin bool // Whether or not the node starts a capturing group
|
groupBegin bool // Whether or not the node starts a capturing group
|
||||||
groupEnd bool // Whether or not the node ends a capturing group
|
groupEnd bool // Whether or not the node ends a capturing group
|
||||||
groupNum int // Which capturing group the node starts / ends
|
groupNum int // Which capturing group the node starts / ends
|
||||||
// The following properties depend on the current match - I should think about resetting them for every match.
|
|
||||||
threadGroups []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
|
|
||||||
isBackreference bool // Whether or not current node is backreference
|
isBackreference bool // Whether or not current node is backreference
|
||||||
referredGroup int // If current node is a backreference, the node that it points to
|
referredGroup int // If current node is a backreference, the node that it points to
|
||||||
|
// The following properties depend on the current match - I should think about resetting them for every match.
|
||||||
|
threadGroups []Group // Assuming that a state is part of a 'thread' in the matching process, this array stores the indices of capturing groups in the current thread. As matches are found for this state, its groups will be copied over.
|
||||||
threadBackref int // If current node is a backreference, how many characters to look forward into the referred group
|
threadBackref int // If current node is a backreference, how many characters to look forward into the referred group
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,6 +86,8 @@ func cloneStateHelper(stateToClone *nfaState, cloneMap map[*nfaState]*nfaState)
|
|||||||
groupEnd: stateToClone.groupEnd,
|
groupEnd: stateToClone.groupEnd,
|
||||||
groupBegin: stateToClone.groupBegin,
|
groupBegin: stateToClone.groupBegin,
|
||||||
groupNum: stateToClone.groupNum,
|
groupNum: stateToClone.groupNum,
|
||||||
|
isBackreference: stateToClone.isBackreference,
|
||||||
|
referredGroup: stateToClone.referredGroup,
|
||||||
}
|
}
|
||||||
cloneMap[stateToClone] = clone
|
cloneMap[stateToClone] = clone
|
||||||
for i, s := range stateToClone.output {
|
for i, s := range stateToClone.output {
|
||||||
|
|||||||
@@ -117,6 +117,7 @@ var reTests = []struct {
|
|||||||
{`\d{3,4}`, nil, "ababab555", []Group{{6, 9}}},
|
{`\d{3,4}`, nil, "ababab555", []Group{{6, 9}}},
|
||||||
{`\bpaint\b`, nil, "paints", []Group{}},
|
{`\bpaint\b`, nil, "paints", []Group{}},
|
||||||
{`\b\w{5}\b`, nil, "paint", []Group{{0, 5}}},
|
{`\b\w{5}\b`, nil, "paint", []Group{{0, 5}}},
|
||||||
|
{`\w{}`, nil, "test", nil},
|
||||||
{`[^\w]`, nil, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
|
{`[^\w]`, nil, "abcdef1230[]qq';;'", []Group{{10, 11}, {11, 12}, {14, 15}, {15, 16}, {16, 17}, {17, 18}}},
|
||||||
{`[^\W]`, nil, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
|
{`[^\W]`, nil, "abcdef1230[]qq';;'", []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9, 10}, {12, 13}, {13, 14}}},
|
||||||
{`[\[\]]`, nil, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
{`[\[\]]`, nil, "a[b[l]]", []Group{{1, 2}, {3, 4}, {5, 6}, {6, 7}}},
|
||||||
@@ -547,6 +548,20 @@ var reTests = []struct {
|
|||||||
{`[\p{Greek}\p{Cyrillic}]`, nil, `ΣωШД`, []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}}},
|
{`[\p{Greek}\p{Cyrillic}]`, nil, `ΣωШД`, []Group{{0, 1}, {1, 2}, {2, 3}, {3, 4}}},
|
||||||
|
|
||||||
{`(?<=\().*?(?=\))`, nil, `(abc)`, []Group{{1, 4}}},
|
{`(?<=\().*?(?=\))`, nil, `(abc)`, []Group{{1, 4}}},
|
||||||
|
|
||||||
|
{`((a|b)\2)`, nil, `aa`, []Group{{0, 2}}},
|
||||||
|
{`((a|b)\2)`, nil, `bb`, []Group{{0, 2}}},
|
||||||
|
{`((a|b)\2)`, nil, `ab`, []Group{}},
|
||||||
|
{`((a|b)\2)`, nil, `ba`, []Group{}},
|
||||||
|
|
||||||
|
{`((a|b)\2){3}`, nil, `aaaaaa`, []Group{{0, 6}}},
|
||||||
|
{`((a|b)\2){3}`, nil, `bbbbbb`, []Group{{0, 6}}},
|
||||||
|
{`((a|b)\2){3}`, nil, `bbaaaa`, []Group{{0, 6}}},
|
||||||
|
{`((a|b)\2){3}`, nil, `aabbaa`, []Group{{0, 6}}},
|
||||||
|
{`((a|b)\2){3}`, nil, `aaaabb`, []Group{{0, 6}}},
|
||||||
|
{`((a|b)\2){3}`, nil, `bbaabb`, []Group{{0, 6}}},
|
||||||
|
{`((a|b)\2){3}`, nil, `baabab`, []Group{}},
|
||||||
|
{`((a|b)\2){3}`, nil, `bbabab`, []Group{}},
|
||||||
}
|
}
|
||||||
|
|
||||||
var groupTests = []struct {
|
var groupTests = []struct {
|
||||||
|
|||||||
@@ -4,4 +4,5 @@
|
|||||||
Ideas for flags:
|
Ideas for flags:
|
||||||
-m <num> : Print <num>th match (-m 1 = first match, -m 2 = second match)
|
-m <num> : Print <num>th match (-m 1 = first match, -m 2 = second match)
|
||||||
-g <num> : Print the <num>th group
|
-g <num> : Print the <num>th group
|
||||||
|
-r : Specify a directory instead of a file, reads recursively
|
||||||
4. Refactor code for flags - make each flag's code a function, which modifies the result of findAllMatches
|
4. Refactor code for flags - make each flag's code a function, which modifies the result of findAllMatches
|
||||||
|
|||||||
Reference in New Issue
Block a user