diff --git a/regex/matching.go b/regex/matching.go index 06fd16b..dab6446 100644 --- a/regex/matching.go +++ b/regex/matching.go @@ -2,7 +2,6 @@ package regex import ( "fmt" - "slices" "sort" ) @@ -252,32 +251,36 @@ func (regex Reg) FindAllSubmatch(str string) []Match { return indices } -func addStateToList(idx int, list []nfaState, state nfaState) []nfaState { +func addStateToList(idx int, list []nfaState, state nfaState, threadGroups []Group) []nfaState { if stateExists(list, state) { return list } - if state.isAlternation { - copyThread(state.next, state) - list = append(list, addStateToList(idx, list, *state.next)...) + if state.isKleene || state.isQuestion { copyThread(state.splitState, state) - list = append(list, addStateToList(idx, list, *state.splitState)...) + list = addStateToList(idx, list, *state.splitState, threadGroups) + copyThread(state.next, state) + list = addStateToList(idx, list, *state.next, threadGroups) return list } - if state.isKleene { - copyThread(state.splitState, state) - list = append(list, addStateToList(idx, list, *state.splitState)...) + if state.isAlternation { copyThread(state.next, state) - list = append(list, addStateToList(idx, list, *state.next)...) + list = addStateToList(idx, list, *state.next, threadGroups) + copyThread(state.splitState, state) + list = addStateToList(idx, list, *state.splitState, threadGroups) return list } + + state.threadGroups = append([]Group{}, threadGroups...) if state.groupBegin { state.threadGroups[state.groupNum].StartIdx = idx + return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...) } if state.groupEnd { - state.threadGroups[state.groupNum].StartIdx = idx + state.threadGroups[state.groupNum].EndIdx = idx + return append(list, addStateToList(idx, list, *state.next, state.threadGroups)...) } - copyThread(state.next, state) - return append(list, *state.next) + state.threadGroups = append([]Group{}, threadGroups...) + return append(list, state) } @@ -335,138 +338,113 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in // tempIndices[start.groupNum].startIdx = i //} - currentStates = append(currentStates, *start) - var foundMatch bool - var isEmptyAndNoAssertion bool + start.threadGroups = newMatch(numGroups + 1) + start.threadGroups[0].StartIdx = i + currentStates = addStateToList(i, currentStates, *start, start.threadGroups) + var match Match = nil + // var isEmptyAndNoAssertion bool // Main loop for idx := i; idx <= len(str); idx++ { + if len(currentStates) == 0 { + break + } for currentStateIdx := 0; currentStateIdx < len(currentStates); currentStateIdx++ { currentState := currentStates[currentStateIdx] - foundMatch = false - isEmptyAndNoAssertion = false if currentState.threadGroups == nil { currentState.threadGroups = newMatch(numGroups + 1) currentState.threadGroups[0].StartIdx = idx } - if currentState.groupBegin { - currentState.threadGroups[currentState.groupNum].StartIdx = idx - // allMatches := make([]nfaState, 0) - // for _, v := range currentState.transitions { - // dereferenced := funcMap(v, func(s *nfaState) nfaState { - // return *s - // }) - // allMatches = append(allMatches, dereferenced...) - // } - // slices.Reverse(allMatches) - // for i := range allMatches { - // copyThread(&allMatches[i], currentState) - // } - // currentStates = append(currentStates, allMatches...) - } - if currentState.groupEnd { - currentState.threadGroups[currentState.groupNum].EndIdx = idx - // allMatches := make([]nfaState, 0) - // for _, v := range currentState.transitions { - // dereferenced := funcMap(v, func(s *nfaState) nfaState { - // return *s - // }) - // allMatches = append(allMatches, dereferenced...) - // } - // slices.Reverse(allMatches) - // for i := range allMatches { - // copyThread(&allMatches[i], currentState) - // } - // currentStates = append(currentStates, allMatches...) + if currentState.isLast { + currentState.threadGroups[0].EndIdx = idx + match = append([]Group{}, currentState.threadGroups...) + break + } else if !currentState.isAlternation && !currentState.isKleene && !currentState.isQuestion && !currentState.groupBegin && !currentState.groupEnd { // Normal character or assertion + if currentState.contentContains(str, idx) { + nextStates = addStateToList(idx+1, nextStates, *currentState.next, currentState.threadGroups) + } } - // if currentState.isKleene { - // // Append the next-state (after the kleene), then append the kleene state - // allMatches := make([]*nfaState, 0) - // for _, v := range currentState.transitions { - // allMatches = append(allMatches, v...) + // if currentState.groupBegin { + // currentState.threadGroups[currentState.groupNum].StartIdx = idx // } - // slices.Reverse(allMatches) - // for _, m := range allMatches { - // m.threadGroups = currentState.threadGroups - // m.threadSP = idx + // if currentState.groupEnd { + // currentState.threadGroups[currentState.groupNum].EndIdx = idx // } - // currentStates = append(currentStates, allMatches...) - // - // // kleeneState := currentState.kleeneState - // // kleeneState.threadGroups = currentState.threadGroups - // // kleeneState.threadSP = currentState.threadSP - // // currentStates = append(currentStates, kleeneState) - // continue - // } // Alternation - enqueue left then right state, and continue - if currentState.isAlternation { - if currentState.isKleene { // Reverse order of adding things - rightState := currentState.splitState - copyThread(rightState, currentState) - currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState) - leftState := currentState.next - copyThread(leftState, currentState) - currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState) - } else { - leftState := currentState.next - copyThread(leftState, currentState) - currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState) - rightState := currentState.splitState - copyThread(rightState, currentState) - currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState) - } - continue - } + // if currentState.isAlternation { + // if currentState.isKleene { // Reverse order of adding things + // rightState := currentState.splitState + // copyThread(rightState, currentState) + // currentStates = slices.Insert(currentStates, currentStateIdx+1, *rightState) + // leftState := currentState.next + // copyThread(leftState, currentState) + // currentStates = slices.Insert(currentStates, currentStateIdx+2, *leftState) + // } else { + // leftState := currentState.next + // copyThread(leftState, currentState) + // currentStates = slices.Insert(currentStates, currentStateIdx+1, *leftState) + // rightState := currentState.splitState + // copyThread(rightState, currentState) + // currentStates = slices.Insert(currentStates, currentStateIdx+2, *rightState) + // } + // continue + // } // Empty state - enqueue next state, do _not_ increment the SP - if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false { - isEmptyAndNoAssertion = true - } - - if currentState.contentContains(str, idx) { - foundMatch = true - } - - if isEmptyAndNoAssertion || foundMatch { - nextMatch := *(currentState.next) - copyThread(&nextMatch, currentState) - if currentState.groupBegin { - // if !stateExists(currentStates, nextMatch) { - currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) - //} - } else if currentState.groupEnd { - if !stateExists(currentStates, nextMatch) { - currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch) - } - } else if currentState.assert != noneAssert { - if !stateExists(currentStates, nextMatch) { - currentStates = append(currentStates, nextMatch) - } - } else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd { - if !stateExists(currentStates, nextMatch) { - currentStates = append(currentStates, nextMatch) - } - } else { - if !stateExists(nextStates, nextMatch) { - nextStates = append(nextStates, nextMatch) - } - } - } - - if currentState.isLast && len(nextStates) == 0 { // Last state reached - currentState.threadGroups[0].EndIdx = idx - if idx == currentState.threadGroups[0].StartIdx { - idx += 1 - } - return true, currentState.threadGroups, idx - } + // if !currentState.isAlternation && currentState.isEmpty && currentState.assert == noneAssert { //&& currentState.groupBegin == false && currentState.groupEnd == false { + // isEmptyAndNoAssertion = true + // } + // + // if currentState.contentContains(str, idx) { + // foundMatch = true + // } + // + // if isEmptyAndNoAssertion || foundMatch { + // nextMatch := *(currentState.next) + // copyThread(&nextMatch, currentState) + // if currentState.groupBegin { + // // if !stateExists(currentStates, nextMatch) { + // currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) + // //} + // } else if currentState.groupEnd { + // if !stateExists(currentStates, nextMatch) { + // currentStates = slices.Insert(currentStates, currentStateIdx+1, nextMatch) // append(currentStates, nextMatch) + // } + // } else if currentState.assert != noneAssert { + // if !stateExists(currentStates, nextMatch) { + // currentStates = append(currentStates, nextMatch) + // } + // } else if currentState.isEmpty && !currentState.groupBegin && !currentState.groupEnd { + // if !stateExists(currentStates, nextMatch) { + // currentStates = append(currentStates, nextMatch) + // } + // } else { + // if !stateExists(nextStates, nextMatch) { + // nextStates = append(nextStates, nextMatch) + // } + // } + // } + // + // if currentState.isLast && len(nextStates) == 0 { // Last state reached + // currentState.threadGroups[0].EndIdx = idx + // if idx == currentState.threadGroups[0].StartIdx { + // idx += 1 + // } + // return true, currentState.threadGroups, idx + // } } currentStates = append([]nfaState{}, nextStates...) nextStates = nil } + if match != nil { + if offset == match[0].EndIdx { + return true, match, match[0].EndIdx + 1 + } + return true, match, match[0].EndIdx + } return false, []Group{}, i + 1 // zeroStates := make([]*nfaState, 0) // // Keep taking zero-states, until there are no more left to take