@ -39,7 +39,55 @@ See: https://blog.cernera.me/converting-regular-expressions-to-postfix-notation-
* /
func shuntingYard ( re string ) [ ] postfixNode {
re_postfix := make ( [ ] rune , 0 )
re_runes := [ ] rune ( re ) // Convert the string to a slice of runes to allow iteration through it
// Convert the string to a slice of runes to allow iteration through it
re_runes_orig := [ ] rune ( re ) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges)
re_runes := make ( [ ] rune , 0 )
// Check for numeric range. If we are at the start of a numeric range,
// skip to end and construct the equivalent regex for the range.
// The reason this is outside the loop below, is that it actually modifies
// the given regex (we 'cut' the numeric range and 'paste' an equivalent regex).
// It also makes the overall parsing easier, since I don't have to worry about the numeric range
// anymore.
// Eventually, I might be able to add it into the main parsing loop, to reduce the time
// complexity.
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
for i := 0 ; i < len ( re_runes_orig ) ; i ++ {
c := re_runes_orig [ i ]
if c == '<' && ( i == 0 || ( re_runes_orig [ i - 1 ] != '\\' && re_runes_orig [ i - 1 ] != '?' ) ) {
i ++ // Step over opening angle bracket
tmpStr := ""
hyphenFound := false
for i < len ( re_runes_orig ) && re_runes_orig [ i ] != '>' {
if ! unicode . IsDigit ( re_runes_orig [ i ] ) {
if re_runes_orig [ i ] != '-' || ( hyphenFound ) {
panic ( "ERROR: Invalid numeric range." )
}
}
if re_runes_orig [ i ] == '-' {
hyphenFound = true
}
tmpStr += string ( re_runes_orig [ i ] )
i ++
}
// End of string reached and last character doesn't close the range
if i == len ( re_runes_orig ) && re_runes_orig [ len ( re_runes_orig ) - 1 ] != '>' {
panic ( "ERROR: Numeric range not closed." )
}
if len ( tmpStr ) == 0 {
panic ( "ERROR: Empty numeric range." )
}
// Closing bracket will be skipped when the loop variable increments
var rangeStart int
var rangeEnd int
fmt . Sscanf ( tmpStr , "%d-%d" , & rangeStart , & rangeEnd )
regex := range2regex ( rangeStart , rangeEnd )
re_runes = append ( re_runes , [ ] rune ( regex ) ... )
} else {
re_runes = append ( re_runes , c )
}
}
/ * Add concatenation operators .
Only add a concatenation operator between two characters if both the following conditions are met :
1. The first character isn ' t an opening parantheses or alteration operator ( or an escape character )
@ -541,6 +589,7 @@ func main() {
var test_runes [ ] rune // Rune-slice representation of test_str
var err error
var linesRead bool // Whether or not we have read the lines in the file
lineNum := 0 // Current line number
// Create reader for stdin and writer for stdout
reader := bufio . NewReader ( os . Stdin )
out := bufio . NewWriter ( os . Stdout )
@ -554,6 +603,7 @@ func main() {
if ! ( * multiLineFlag ) {
// Read every string from stdin until we encounter an error. If the error isn't EOF, panic.
test_str , err = reader . ReadString ( '\n' )
lineNum ++
if err != nil {
if err == io . EOF {
linesRead = true
@ -582,8 +632,19 @@ func main() {
test_runes = [ ] rune ( test_str )
matchIndices := findAllMatches ( startState , test_runes )
if * printMatchesFlag {
// if we are in single line mode, print the line on which
// the matches occur
if len ( matchIndices ) > 0 {
if ! ( * multiLineFlag ) {
fmt . Fprintf ( out , "Line %d:\n" , lineNum )
}
for _ , idx := range matchIndices {
fmt . Printf ( "%s\n" , idx . toString ( ) )
fmt . Fprintf ( out , "%s\n" , idx . toString ( ) )
}
err := out . Flush ( )
if err != nil {
panic ( err )
}
}
continue
}