From 858e535fba88846dc8ecda50010834c27762ce92 Mon Sep 17 00:00:00 2001 From: Aadhavan Srinivasan Date: Wed, 5 Feb 2025 18:01:36 -0500 Subject: [PATCH] Continued implementing Thompson's algorithm --- regex/matching.go | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/regex/matching.go b/regex/matching.go index 760f7c1..3252742 100644 --- a/regex/matching.go +++ b/regex/matching.go @@ -153,7 +153,7 @@ func pruneIndices(indices []Match) []Match { func copyThread(to *nfaState, from nfaState) { to.threadSP = from.threadSP - to.threadGroups = from.threadGroups + to.threadGroups = append([]Group{}, from.threadGroups...) } // Find returns the 0-group of the leftmost match of the regex in the given string. @@ -325,10 +325,33 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in if currentState.groupBegin { currentState.threadGroups[currentState.groupNum].StartIdx = idx + // allMatches := make([]nfaState, 0) + // for _, v := range currentState.transitions { + // dereferenced := funcMap(v, func(s *nfaState) nfaState { + // return *s + // }) + // allMatches = append(allMatches, dereferenced...) + // } + // slices.Reverse(allMatches) + // for i := range allMatches { + // copyThread(&allMatches[i], currentState) + // } + // currentStates = append(currentStates, allMatches...) } - if currentState.groupEnd { currentState.threadGroups[currentState.groupNum].EndIdx = idx + // allMatches := make([]nfaState, 0) + // for _, v := range currentState.transitions { + // dereferenced := funcMap(v, func(s *nfaState) nfaState { + // return *s + // }) + // allMatches = append(allMatches, dereferenced...) + // } + // slices.Reverse(allMatches) + // for i := range allMatches { + // copyThread(&allMatches[i], currentState) + // } + // currentStates = append(currentStates, allMatches...) } // if currentState.isKleene { @@ -363,7 +386,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in } // Empty state - enqueue next state, do _not_ increment the SP - if currentState.isEmpty && currentState.assert == noneAssert { + if currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false { isEmptyAndNoAssertion = true } @@ -386,10 +409,16 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in allMatches[i].threadSP += 1 } } - nextStates = append(nextStates, allMatches...) + if currentState.groupBegin { + currentStates = slices.Insert(currentStates, currentStateIdx+1, allMatches...) + } else if currentState.groupEnd { + currentStates = append(currentStates, allMatches...) + } else { + nextStates = append(nextStates, allMatches...) + } } - if currentState.isLast { // Last state reached + if currentState.isLast && len(nextStates) == 0 { // Last state reached if foundMatch { if currentState.assert != noneAssert { currentState.threadGroups[0].EndIdx = idx @@ -400,7 +429,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in idx += 1 } return true, currentState.threadGroups, idx - } else if currentState.isEmpty && currentState.assert == noneAssert { + } else if isEmptyAndNoAssertion { currentState.threadGroups[0].EndIdx = idx if idx == currentState.threadGroups[0].StartIdx { idx++ @@ -410,7 +439,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in } } - copy(currentStates, nextStates) + currentStates = append([]nfaState{}, nextStates...) nextStates = nil } return false, []Group{}, i + 1