package main import ( "bufio" "flag" "fmt" "io" "os" "slices" "github.com/fatih/color" ) const CONCAT rune = '~' var notDotChars []rune var caseInsensitiveFlag *bool // Whether we are running in case-insensitive mode func isOperator(c rune) bool { if c == '+' || c == '?' || c == '*' || c == '|' || c == CONCAT { return true } return false } /* priority returns the priority of the given operator */ func priority(op rune) int { precedence := []rune{'|', CONCAT, '+', '*', '?'} return slices.Index(precedence, op) } func main() { invertFlag := flag.Bool("v", false, "Invert match.") // This flag has two 'modes': // 1. Without '-v': Prints only matches. Prints a newline after every match. // 2. With '-v': Substitutes all matches with empty string. onlyFlag := flag.Bool("o", false, "Print only colored content. Overrides -l.") lineFlag := flag.Bool("l", false, "Only print lines with a match (or with no matches, if -v is enabled). Similar to grep's default.") multiLineFlag := flag.Bool("t", false, "Multi-line mode. Treats newline just like any character.") printMatchesFlag := flag.Bool("p", false, "Prints start and end index of each match. Can only be used with '-t' for multi-line mode.") caseInsensitiveFlag = flag.Bool("i", false, "Case-insensitive. Disregard the case of all characters.") matchNum := flag.Int("m", 0, "Print the match with the given index. Eg. -m 3 prints the third match.") substituteText := flag.String("s", "", "Substitute the contents of each match with the given string. Overrides -o and -v") flag.Parse() // In multi-line mode, 'dot' metacharacter also matches newline if !(*multiLineFlag) { notDotChars = []rune{'\n'} } else { notDotChars = []rune{} } // -l and -o are mutually exclusive: -o overrides -l if *onlyFlag { *lineFlag = false } // Check if substitute and matchNum flags have been enabled substituteFlagEnabled := false matchNumFlagEnabled := false flag.Visit(func(f *flag.Flag) { if f.Name == "s" { substituteFlagEnabled = true } if f.Name == "m" { matchNumFlagEnabled = true } }) // Validate matchNumFlag - must be positive integer if matchNumFlagEnabled && *matchNum < 1 { panic("Invalid match number to print.") } // Process: // 1. Convert regex into postfix notation (Shunting-Yard algorithm) // a. Add explicit concatenation operators to facilitate this // 2. Build NFA from postfix representation (Thompson's algorithm) // 3. Run the string against the NFA if len(flag.Args()) != 1 { // flag.Args() also strips out program name fmt.Println("ERROR: Missing cmdline args") os.Exit(22) } var re string re = flag.Args()[0] var test_str string var test_runes []rune // Rune-slice representation of test_str var err error var linesRead bool // Whether or not we have read the lines in the file lineNum := 0 // Current line number // Create reader for stdin and writer for stdout reader := bufio.NewReader(os.Stdin) out := bufio.NewWriter(os.Stdout) re_postfix := shuntingYard(re) startState, numGroups := thompson(re_postfix) for true { if linesRead { break } if !(*multiLineFlag) { // Read every string from stdin until we encounter an error. If the error isn't EOF, panic. test_str, err = reader.ReadString('\n') lineNum++ if err != nil { if err == io.EOF { linesRead = true } else { panic(err) } } if len(test_str) > 0 && test_str[len(test_str)-1] == '\n' { test_str = test_str[:len(test_str)-1] } } else { // Multi-line mode - read every line of input into a temp. string. // test_str will contain all lines of input (including newline characters) // as one string. var temp string for temp, err = reader.ReadString('\n'); err == nil; temp, err = reader.ReadString('\n') { test_str += temp } // Assuming err != nil if err == io.EOF { if len(temp) > 0 { test_str += temp // Add the last line (if it is non-empty) } linesRead = true } else { panic(err) } } test_runes = []rune(test_str) matchIndices := make([]Match, 0) if matchNumFlagEnabled { tmp, err := findNthMatch(startState, test_runes, numGroups, *matchNum) if err == nil { matchIndices = append(matchIndices, tmp) } } else { matchIndices = findAllMatches(startState, test_runes, numGroups) } if *printMatchesFlag { // if we are in single line mode, print the line on which // the matches occur if len(matchIndices) > 0 { if !(*multiLineFlag) { fmt.Fprintf(out, "Line %d:\n", lineNum) } for _, m := range matchIndices { fmt.Fprintf(out, "%s\n", m.toString()) } err := out.Flush() if err != nil { panic(err) } } continue } // Decompose the array of matchIndex structs into a flat unique array of ints - if matchIndex is {4,7}, flat array will contain 4,5,6 // This should make checking O(1) instead of O(n) indicesToPrint := new_uniq_arr[int]() for _, idx := range matchIndices { indicesToPrint.add(genRange(idx[0].startIdx, idx[0].endIdx)...) } // If we are inverting, then we should print the indices which _didn't_ match // in color. if *invertFlag { oldIndices := indicesToPrint.values() indicesToPrint = new_uniq_arr[int]() // Explanation: // Find all numbers from 0 to len(test_str) that are NOT in oldIndices. // These are the values we want to print, now that we have inverted the match. // Re-initialize indicesToPrint and add all of these values to it. indicesToPrint.add(setDifference(genRange(0, len(test_runes)), oldIndices)...) } // If lineFlag is enabled, we should only print something if: // a. We are not inverting, and have at least one match on the current line // OR // b. We are inverting, and have no matches at all on the current line. // This checks for the inverse, and continues if it is true. if *lineFlag { if !(*invertFlag) && len(matchIndices) == 0 || *invertFlag && len(matchIndices) > 0 { continue } } // If we are substituting, we need a different behavior, as follows: // For every character in the test string: // 1. Check if the index is the start of any matchIndex // 2. If so, print the substitute text, and set our index to // the corresponding end index. // 3. If not, just print the character. if substituteFlagEnabled { for i := range test_runes { inMatchIndex := false for _, m := range matchIndices { if i == m[0].startIdx { fmt.Fprintf(out, "%s", *substituteText) i = m[0].endIdx inMatchIndex = true break } } if !inMatchIndex { fmt.Fprintf(out, "%c", test_runes[i]) } } } else { for i, c := range test_runes { if indicesToPrint.contains(i) { color.New(color.FgRed).Fprintf(out, "%c", c) // Newline after every match - only if -o is enabled and -v is disabled. if *onlyFlag && !(*invertFlag) { for _, idx := range matchIndices { if i+1 == idx[0].endIdx { // End index is one more than last index of match fmt.Fprintf(out, "\n") break } } } } else { if !(*onlyFlag) { fmt.Fprintf(out, "%c", c) } } } } err = out.Flush() if err != nil { panic(err) } fmt.Println() } }