Added unicode support to dot metacharacter - it now matches _any_ unicode character (almost)
This commit is contained in:
13
main.go
13
main.go
@@ -141,7 +141,7 @@ func shuntingYard(re string) []postfixNode {
|
||||
}
|
||||
|
||||
if c == '.' { // Dot metacharacter - represents 'any' character, but I am only adding Unicode 0020-007E
|
||||
outQueue = append(outQueue, newPostfixNode(dotChars()...))
|
||||
outQueue = append(outQueue, newPostfixDotNode())
|
||||
continue
|
||||
}
|
||||
if c == '^' { // Start-of-string assertion
|
||||
@@ -282,6 +282,9 @@ func thompson(re []postfixNode) *State {
|
||||
if c.nodetype == CHARACTER || c.nodetype == ASSERTION {
|
||||
state := State{}
|
||||
state.transitions = make(map[int][]*State)
|
||||
if c.isDot {
|
||||
state.isDot = true
|
||||
}
|
||||
state.content = rune2Contents(c.contents)
|
||||
state.output = make([]*State, 0)
|
||||
state.output = append(state.output, &state)
|
||||
@@ -397,6 +400,7 @@ func main() {
|
||||
var re string
|
||||
re = flag.Args()[0]
|
||||
var test_str string
|
||||
var test_runes []rune // Rune-slice representation of test_str
|
||||
var err error
|
||||
// Create reader for stdin and writer for stdout // End index is one more than last index of match
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
@@ -406,7 +410,8 @@ func main() {
|
||||
startState := thompson(re_postfix)
|
||||
// Read every string from stdin until we encounter an error. If the error isn't EOF, panic.'
|
||||
for test_str, err = reader.ReadString('\n'); err == nil; test_str, err = reader.ReadString('\n') {
|
||||
matchIndices := findAllMatches(startState, []rune(test_str))
|
||||
test_runes = []rune(test_str)
|
||||
matchIndices := findAllMatches(startState, []rune(test_runes))
|
||||
// Decompose the array of matchIndex structs into a flat unique array of ints - if matchIndex is {4,7}, flat array will contain 4,5,6
|
||||
// This should make checking O(1) instead of O(n)
|
||||
indicesToPrint := new_uniq_arr[int]()
|
||||
@@ -422,7 +427,7 @@ func main() {
|
||||
// Find all numbers from 0 to len(test_str) that are NOT in oldIndices.
|
||||
// These are the values we want to print, now that we have inverted the match.
|
||||
// Re-initialize indicesToPrint and add all of these values to it.
|
||||
indicesToPrint.add(setDifference(genRange(0, len(test_str)), oldIndices)...)
|
||||
indicesToPrint.add(setDifference(genRange(0, len(test_runes)), oldIndices)...)
|
||||
|
||||
}
|
||||
// If lineFlag is enabled, we should only print something if:
|
||||
@@ -435,7 +440,7 @@ func main() {
|
||||
continue
|
||||
}
|
||||
}
|
||||
for i, c := range test_str {
|
||||
for i, c := range test_runes {
|
||||
if indicesToPrint.contains(i) {
|
||||
color.New(color.FgRed).Fprintf(out, "%c", c)
|
||||
// Newline after every match - only if -o is enabled and -v is disabled.
|
||||
|
Reference in New Issue
Block a user