Added unicode support
Replaced strings with rune-slices, which capture unicode codepoints more accurately.
This commit is contained in:
2
main.go
2
main.go
@@ -406,7 +406,7 @@ func main() {
|
||||
startState := thompson(re_postfix)
|
||||
// Read every string from stdin until we encounter an error. If the error isn't EOF, panic.'
|
||||
for test_str, err = reader.ReadString('\n'); err == nil; test_str, err = reader.ReadString('\n') {
|
||||
matchIndices := findAllMatches(startState, test_str)
|
||||
matchIndices := findAllMatches(startState, []rune(test_str))
|
||||
// Decompose the array of matchIndex structs into a flat unique array of ints - if matchIndex is {4,7}, flat array will contain 4,5,6
|
||||
// This should make checking O(1) instead of O(n)
|
||||
indicesToPrint := new_uniq_arr[int]()
|
||||
|
@@ -81,7 +81,7 @@ func pruneIndices(indices []MatchIndex) []MatchIndex {
|
||||
|
||||
// findAllMatches tries to find all matches of the regex represented by given start-state, with
|
||||
// the given string
|
||||
func findAllMatches(start *State, str string) []MatchIndex {
|
||||
func findAllMatches(start *State, str []rune) []MatchIndex {
|
||||
idx := 0
|
||||
var matchFound bool
|
||||
var matchIdx MatchIndex
|
||||
@@ -104,7 +104,7 @@ func findAllMatches(start *State, str string) []MatchIndex {
|
||||
// the next search should start from.
|
||||
//
|
||||
// Might return duplicates or overlapping indices, so care must be taken to prune the resulting array.
|
||||
func findAllMatchesHelper(start *State, str string, offset int) (bool, MatchIndex, int) {
|
||||
func findAllMatchesHelper(start *State, str []rune, offset int) (bool, MatchIndex, int) {
|
||||
// Base case - exit if offset exceeds string's length
|
||||
if offset > len(str) {
|
||||
// The first value here shouldn't be used, because we should exit when the second return value is > than len(str)
|
||||
|
2
misc.go
2
misc.go
@@ -22,7 +22,7 @@ func dotChars() []rune { // Returns all possible characters represented by the d
|
||||
}
|
||||
|
||||
// Returns true if str[idx] and str[idx-1] are separated by a word boundary.
|
||||
func isWordBoundary(str string, idx int) bool {
|
||||
func isWordBoundary(str []rune, idx int) bool {
|
||||
str_runes := []rune(str)
|
||||
wbounded := idx == 0 ||
|
||||
idx >= len(str) ||
|
||||
|
6
nfa.go
6
nfa.go
@@ -73,7 +73,7 @@ func cloneStateHelper(state *State, cloneMap map[*State]*State) *State {
|
||||
|
||||
// Checks if the given state's assertion is true. Returns true if the given
|
||||
// state doesn't have an assertion.
|
||||
func (s State) checkAssertion(str string, idx int) bool {
|
||||
func (s State) checkAssertion(str []rune, idx int) bool {
|
||||
if s.assert == SOS {
|
||||
return idx == 0
|
||||
}
|
||||
@@ -90,7 +90,7 @@ func (s State) checkAssertion(str string, idx int) bool {
|
||||
}
|
||||
|
||||
// Returns true if the contents of 's' contain the value at the given index of the given string
|
||||
func (s State) contentContains(str string, idx int) bool {
|
||||
func (s State) contentContains(str []rune, idx int) bool {
|
||||
if s.assert != NONE {
|
||||
return s.checkAssertion(str, idx)
|
||||
}
|
||||
@@ -100,7 +100,7 @@ func (s State) contentContains(str string, idx int) bool {
|
||||
|
||||
// Returns the matches for the character at the given index of the given string.
|
||||
// Also returns the number of matches. Returns -1 if an assertion failed.
|
||||
func (s State) matchesFor(str string, idx int) ([]*State, int) {
|
||||
func (s State) matchesFor(str []rune, idx int) ([]*State, int) {
|
||||
// Assertions can be viewed as 'checks'. If the check fails, we return
|
||||
// an empty array and 0.
|
||||
// If it passes, we treat it like any other state, and return all the transitions.
|
||||
|
@@ -116,7 +116,7 @@ func TestFindAllMatches(t *testing.T) {
|
||||
t.Run(test.re+" "+test.str, func(t *testing.T) {
|
||||
re_postfix := shuntingYard(test.re)
|
||||
startState := thompson(re_postfix)
|
||||
matchIndices := findAllMatches(startState, test.str)
|
||||
matchIndices := findAllMatches(startState, []rune(test.str))
|
||||
if !slices.Equal(test.result, matchIndices) {
|
||||
t.Errorf("Wanted %v Got %v\n", test.result, matchIndices)
|
||||
}
|
||||
|
Reference in New Issue
Block a user