diff --git a/main.go b/main.go index 607d261..ef07279 100644 --- a/main.go +++ b/main.go @@ -39,7 +39,55 @@ See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation- */ func shuntingYard(re string) []postfixNode { re_postfix := make([]rune, 0) - re_runes := []rune(re) // Convert the string to a slice of runes to allow iteration through it + // Convert the string to a slice of runes to allow iteration through it + re_runes_orig := []rune(re) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges) + re_runes := make([]rune, 0) + // Check for numeric range. If we are at the start of a numeric range, + // skip to end and construct the equivalent regex for the range. + // The reason this is outside the loop below, is that it actually modifies + // the given regex (we 'cut' the numeric range and 'paste' an equivalent regex). + // It also makes the overall parsing easier, since I don't have to worry about the numeric range + // anymore. + // Eventually, I might be able to add it into the main parsing loop, to reduce the time + // complexity. + // A numeric range has the syntax: . Ir matches all numbers in this range. + for i := 0; i < len(re_runes_orig); i++ { + c := re_runes_orig[i] + if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) { + i++ // Step over opening angle bracket + tmpStr := "" + hyphenFound := false + for i < len(re_runes_orig) && re_runes_orig[i] != '>' { + if !unicode.IsDigit(re_runes_orig[i]) { + if re_runes_orig[i] != '-' || (hyphenFound) { + panic("ERROR: Invalid numeric range.") + } + } + if re_runes_orig[i] == '-' { + hyphenFound = true + } + tmpStr += string(re_runes_orig[i]) + i++ + } + // End of string reached and last character doesn't close the range + if i == len(re_runes_orig) && re_runes_orig[len(re_runes_orig)-1] != '>' { + panic("ERROR: Numeric range not closed.") + } + if len(tmpStr) == 0 { + panic("ERROR: Empty numeric range.") + } + // Closing bracket will be skipped when the loop variable increments + var rangeStart int + var rangeEnd int + fmt.Sscanf(tmpStr, "%d-%d", &rangeStart, &rangeEnd) + regex := range2regex(rangeStart, rangeEnd) + re_runes = append(re_runes, []rune(regex)...) + + } else { + re_runes = append(re_runes, c) + } + } + /* Add concatenation operators. Only add a concatenation operator between two characters if both the following conditions are met: 1. The first character isn't an opening parantheses or alteration operator (or an escape character) @@ -541,6 +589,7 @@ func main() { var test_runes []rune // Rune-slice representation of test_str var err error var linesRead bool // Whether or not we have read the lines in the file + lineNum := 0 // Current line number // Create reader for stdin and writer for stdout reader := bufio.NewReader(os.Stdin) out := bufio.NewWriter(os.Stdout) @@ -554,6 +603,7 @@ func main() { if !(*multiLineFlag) { // Read every string from stdin until we encounter an error. If the error isn't EOF, panic. test_str, err = reader.ReadString('\n') + lineNum++ if err != nil { if err == io.EOF { linesRead = true @@ -582,8 +632,19 @@ func main() { test_runes = []rune(test_str) matchIndices := findAllMatches(startState, test_runes) if *printMatchesFlag { - for _, idx := range matchIndices { - fmt.Printf("%s\n", idx.toString()) + // if we are in single line mode, print the line on which + // the matches occur + if len(matchIndices) > 0 { + if !(*multiLineFlag) { + fmt.Fprintf(out, "Line %d:\n", lineNum) + } + for _, idx := range matchIndices { + fmt.Fprintf(out, "%s\n", idx.toString()) + } + err := out.Flush() + if err != nil { + panic(err) + } } continue }