Added support for numeric ranges: <5-38> will match all numbers between 5 and 38, inclusive on both ends. Also print line number on which matches occur, if we are in printing (and single line) mode

master
Aadhavan Srinivasan 2 months ago
parent cbd6ea136b
commit 6208f32710

@ -39,7 +39,55 @@ See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-
*/
func shuntingYard(re string) []postfixNode {
re_postfix := make([]rune, 0)
re_runes := []rune(re) // Convert the string to a slice of runes to allow iteration through it
// Convert the string to a slice of runes to allow iteration through it
re_runes_orig := []rune(re) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges)
re_runes := make([]rune, 0)
// Check for numeric range. If we are at the start of a numeric range,
// skip to end and construct the equivalent regex for the range.
// The reason this is outside the loop below, is that it actually modifies
// the given regex (we 'cut' the numeric range and 'paste' an equivalent regex).
// It also makes the overall parsing easier, since I don't have to worry about the numeric range
// anymore.
// Eventually, I might be able to add it into the main parsing loop, to reduce the time
// complexity.
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
for i := 0; i < len(re_runes_orig); i++ {
c := re_runes_orig[i]
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
i++ // Step over opening angle bracket
tmpStr := ""
hyphenFound := false
for i < len(re_runes_orig) && re_runes_orig[i] != '>' {
if !unicode.IsDigit(re_runes_orig[i]) {
if re_runes_orig[i] != '-' || (hyphenFound) {
panic("ERROR: Invalid numeric range.")
}
}
if re_runes_orig[i] == '-' {
hyphenFound = true
}
tmpStr += string(re_runes_orig[i])
i++
}
// End of string reached and last character doesn't close the range
if i == len(re_runes_orig) && re_runes_orig[len(re_runes_orig)-1] != '>' {
panic("ERROR: Numeric range not closed.")
}
if len(tmpStr) == 0 {
panic("ERROR: Empty numeric range.")
}
// Closing bracket will be skipped when the loop variable increments
var rangeStart int
var rangeEnd int
fmt.Sscanf(tmpStr, "%d-%d", &rangeStart, &rangeEnd)
regex := range2regex(rangeStart, rangeEnd)
re_runes = append(re_runes, []rune(regex)...)
} else {
re_runes = append(re_runes, c)
}
}
/* Add concatenation operators.
Only add a concatenation operator between two characters if both the following conditions are met:
1. The first character isn't an opening parantheses or alteration operator (or an escape character)
@ -541,6 +589,7 @@ func main() {
var test_runes []rune // Rune-slice representation of test_str
var err error
var linesRead bool // Whether or not we have read the lines in the file
lineNum := 0 // Current line number
// Create reader for stdin and writer for stdout
reader := bufio.NewReader(os.Stdin)
out := bufio.NewWriter(os.Stdout)
@ -554,6 +603,7 @@ func main() {
if !(*multiLineFlag) {
// Read every string from stdin until we encounter an error. If the error isn't EOF, panic.
test_str, err = reader.ReadString('\n')
lineNum++
if err != nil {
if err == io.EOF {
linesRead = true
@ -582,8 +632,19 @@ func main() {
test_runes = []rune(test_str)
matchIndices := findAllMatches(startState, test_runes)
if *printMatchesFlag {
for _, idx := range matchIndices {
fmt.Printf("%s\n", idx.toString())
// if we are in single line mode, print the line on which
// the matches occur
if len(matchIndices) > 0 {
if !(*multiLineFlag) {
fmt.Fprintf(out, "Line %d:\n", lineNum)
}
for _, idx := range matchIndices {
fmt.Fprintf(out, "%s\n", idx.toString())
}
err := out.Flush()
if err != nil {
panic(err)
}
}
continue
}

Loading…
Cancel
Save