Added support for numeric ranges: <5-38> will match all numbers between 5 and 38, inclusive on both ends. Also print line number on which matches occur, if we are in printing (and single line) mode
This commit is contained in:
67
main.go
67
main.go
@@ -39,7 +39,55 @@ See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-
|
||||
*/
|
||||
func shuntingYard(re string) []postfixNode {
|
||||
re_postfix := make([]rune, 0)
|
||||
re_runes := []rune(re) // Convert the string to a slice of runes to allow iteration through it
|
||||
// Convert the string to a slice of runes to allow iteration through it
|
||||
re_runes_orig := []rune(re) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges)
|
||||
re_runes := make([]rune, 0)
|
||||
// Check for numeric range. If we are at the start of a numeric range,
|
||||
// skip to end and construct the equivalent regex for the range.
|
||||
// The reason this is outside the loop below, is that it actually modifies
|
||||
// the given regex (we 'cut' the numeric range and 'paste' an equivalent regex).
|
||||
// It also makes the overall parsing easier, since I don't have to worry about the numeric range
|
||||
// anymore.
|
||||
// Eventually, I might be able to add it into the main parsing loop, to reduce the time
|
||||
// complexity.
|
||||
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
|
||||
for i := 0; i < len(re_runes_orig); i++ {
|
||||
c := re_runes_orig[i]
|
||||
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
|
||||
i++ // Step over opening angle bracket
|
||||
tmpStr := ""
|
||||
hyphenFound := false
|
||||
for i < len(re_runes_orig) && re_runes_orig[i] != '>' {
|
||||
if !unicode.IsDigit(re_runes_orig[i]) {
|
||||
if re_runes_orig[i] != '-' || (hyphenFound) {
|
||||
panic("ERROR: Invalid numeric range.")
|
||||
}
|
||||
}
|
||||
if re_runes_orig[i] == '-' {
|
||||
hyphenFound = true
|
||||
}
|
||||
tmpStr += string(re_runes_orig[i])
|
||||
i++
|
||||
}
|
||||
// End of string reached and last character doesn't close the range
|
||||
if i == len(re_runes_orig) && re_runes_orig[len(re_runes_orig)-1] != '>' {
|
||||
panic("ERROR: Numeric range not closed.")
|
||||
}
|
||||
if len(tmpStr) == 0 {
|
||||
panic("ERROR: Empty numeric range.")
|
||||
}
|
||||
// Closing bracket will be skipped when the loop variable increments
|
||||
var rangeStart int
|
||||
var rangeEnd int
|
||||
fmt.Sscanf(tmpStr, "%d-%d", &rangeStart, &rangeEnd)
|
||||
regex := range2regex(rangeStart, rangeEnd)
|
||||
re_runes = append(re_runes, []rune(regex)...)
|
||||
|
||||
} else {
|
||||
re_runes = append(re_runes, c)
|
||||
}
|
||||
}
|
||||
|
||||
/* Add concatenation operators.
|
||||
Only add a concatenation operator between two characters if both the following conditions are met:
|
||||
1. The first character isn't an opening parantheses or alteration operator (or an escape character)
|
||||
@@ -541,6 +589,7 @@ func main() {
|
||||
var test_runes []rune // Rune-slice representation of test_str
|
||||
var err error
|
||||
var linesRead bool // Whether or not we have read the lines in the file
|
||||
lineNum := 0 // Current line number
|
||||
// Create reader for stdin and writer for stdout
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
out := bufio.NewWriter(os.Stdout)
|
||||
@@ -554,6 +603,7 @@ func main() {
|
||||
if !(*multiLineFlag) {
|
||||
// Read every string from stdin until we encounter an error. If the error isn't EOF, panic.
|
||||
test_str, err = reader.ReadString('\n')
|
||||
lineNum++
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
linesRead = true
|
||||
@@ -582,8 +632,19 @@ func main() {
|
||||
test_runes = []rune(test_str)
|
||||
matchIndices := findAllMatches(startState, test_runes)
|
||||
if *printMatchesFlag {
|
||||
for _, idx := range matchIndices {
|
||||
fmt.Printf("%s\n", idx.toString())
|
||||
// if we are in single line mode, print the line on which
|
||||
// the matches occur
|
||||
if len(matchIndices) > 0 {
|
||||
if !(*multiLineFlag) {
|
||||
fmt.Fprintf(out, "Line %d:\n", lineNum)
|
||||
}
|
||||
for _, idx := range matchIndices {
|
||||
fmt.Fprintf(out, "%s\n", idx.toString())
|
||||
}
|
||||
err := out.Flush()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
Reference in New Issue
Block a user