|
|
|
@@ -1,4 +1,4 @@
|
|
|
|
|
package main
|
|
|
|
|
package greg
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
|
|
|
@@ -10,15 +10,15 @@ type Match []Group
|
|
|
|
|
|
|
|
|
|
// a Group represents a group. It contains the start index and end index of the match
|
|
|
|
|
type Group struct {
|
|
|
|
|
startIdx int
|
|
|
|
|
endIdx int
|
|
|
|
|
StartIdx int
|
|
|
|
|
EndIdx int
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func newMatch(size int) Match {
|
|
|
|
|
toRet := make([]Group, size)
|
|
|
|
|
for i := range toRet {
|
|
|
|
|
toRet[i].startIdx = -1
|
|
|
|
|
toRet[i].endIdx = -1
|
|
|
|
|
toRet[i].StartIdx = -1
|
|
|
|
|
toRet[i].EndIdx = -1
|
|
|
|
|
}
|
|
|
|
|
return toRet
|
|
|
|
|
}
|
|
|
|
@@ -27,7 +27,7 @@ func newMatch(size int) Match {
|
|
|
|
|
func (m Match) numValidGroups() int {
|
|
|
|
|
numValid := 0
|
|
|
|
|
for _, g := range m {
|
|
|
|
|
if g.startIdx >= 0 && g.endIdx >= 0 {
|
|
|
|
|
if g.StartIdx >= 0 && g.EndIdx >= 0 {
|
|
|
|
|
numValid++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@@ -35,7 +35,7 @@ func (m Match) numValidGroups() int {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns a string containing the indices of all (valid) groups in the match
|
|
|
|
|
func (m Match) toString() string {
|
|
|
|
|
func (m Match) ToString() string {
|
|
|
|
|
var toRet string
|
|
|
|
|
for i, g := range m {
|
|
|
|
|
if g.isValid() {
|
|
|
|
@@ -49,12 +49,12 @@ func (m Match) toString() string {
|
|
|
|
|
|
|
|
|
|
// Converts the Group into a string representation:
|
|
|
|
|
func (idx Group) toString() string {
|
|
|
|
|
return fmt.Sprintf("%d\t%d", idx.startIdx, idx.endIdx)
|
|
|
|
|
return fmt.Sprintf("%d\t%d", idx.StartIdx, idx.EndIdx)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns whether a group contains valid indices
|
|
|
|
|
func (g Group) isValid() bool {
|
|
|
|
|
return g.startIdx >= 0 && g.endIdx >= 0
|
|
|
|
|
return g.StartIdx >= 0 && g.EndIdx >= 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// takeZeroState takes the 0-state (if such a transition exists) for all states in the
|
|
|
|
@@ -70,11 +70,11 @@ func takeZeroState(states []*State, numGroups int, idx int) (rtv []*State, isZer
|
|
|
|
|
}
|
|
|
|
|
copy(s.threadGroups, state.threadGroups)
|
|
|
|
|
if s.groupBegin {
|
|
|
|
|
s.threadGroups[s.groupNum].startIdx = idx
|
|
|
|
|
s.threadGroups[s.groupNum].StartIdx = idx
|
|
|
|
|
// openParenGroups = append(openParenGroups, s.groupNum)
|
|
|
|
|
}
|
|
|
|
|
if s.groupEnd {
|
|
|
|
|
s.threadGroups[s.groupNum].endIdx = idx
|
|
|
|
|
s.threadGroups[s.groupNum].EndIdx = idx
|
|
|
|
|
// closeParenGroups = append(closeParenGroups, s.groupNum)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@@ -118,17 +118,17 @@ func zeroMatchPossible(str []rune, idx int, numGroups int, states ...*State) boo
|
|
|
|
|
func pruneIndices(indices []Match) []Match {
|
|
|
|
|
// First, sort the slice by the start indices
|
|
|
|
|
sort.Slice(indices, func(i, j int) bool {
|
|
|
|
|
return indices[i][0].startIdx < indices[j][0].startIdx
|
|
|
|
|
return indices[i][0].StartIdx < indices[j][0].StartIdx
|
|
|
|
|
})
|
|
|
|
|
toRet := make([]Match, 0, len(indices))
|
|
|
|
|
current := indices[0]
|
|
|
|
|
for _, idx := range indices[1:] {
|
|
|
|
|
// idx doesn't overlap with current (starts after current ends), so add current to result
|
|
|
|
|
// and update the current.
|
|
|
|
|
if idx[0].startIdx >= current[0].endIdx {
|
|
|
|
|
if idx[0].StartIdx >= current[0].EndIdx {
|
|
|
|
|
toRet = append(toRet, current)
|
|
|
|
|
current = idx
|
|
|
|
|
} else if idx[0].endIdx > current[0].endIdx {
|
|
|
|
|
} else if idx[0].EndIdx > current[0].EndIdx {
|
|
|
|
|
// idx overlaps, but it is longer, so update current
|
|
|
|
|
current = idx
|
|
|
|
|
}
|
|
|
|
@@ -147,7 +147,7 @@ func FindString(regex Reg, str string) string {
|
|
|
|
|
if err != nil {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
return str[match[0].startIdx:match[0].endIdx]
|
|
|
|
|
return str[match[0].StartIdx:match[0].EndIdx]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FindAllString is the 'all' version of FindString.
|
|
|
|
@@ -247,7 +247,7 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
|
|
|
|
|
start.threadGroups = newMatch(numGroups + 1)
|
|
|
|
|
// Check if the start state begins a group - if so, add the start index to our list
|
|
|
|
|
if start.groupBegin {
|
|
|
|
|
start.threadGroups[start.groupNum].startIdx = i
|
|
|
|
|
start.threadGroups[start.groupNum].StartIdx = i
|
|
|
|
|
// tempIndices[start.groupNum].startIdx = i
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -356,10 +356,10 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
|
|
|
|
|
// i++
|
|
|
|
|
// }
|
|
|
|
|
if tempIndices.numValidGroups() > 0 && tempIndices[0].isValid() {
|
|
|
|
|
if tempIndices[0].startIdx == tempIndices[0].endIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
|
|
|
|
|
return true, tempIndices, tempIndices[0].endIdx + 1
|
|
|
|
|
if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
|
|
|
|
|
return true, tempIndices, tempIndices[0].EndIdx + 1
|
|
|
|
|
} else {
|
|
|
|
|
return true, tempIndices, tempIndices[0].endIdx
|
|
|
|
|
return true, tempIndices, tempIndices[0].EndIdx
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false, []Group{}, startIdx
|
|
|
|
@@ -402,10 +402,10 @@ func findAllMatchesHelper(start *State, str []rune, offset int, numGroups int) (
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if tempIndices.numValidGroups() > 0 {
|
|
|
|
|
if tempIndices[0].startIdx == tempIndices[0].endIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
|
|
|
|
|
return true, tempIndices, tempIndices[0].endIdx + 1
|
|
|
|
|
if tempIndices[0].StartIdx == tempIndices[0].EndIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
|
|
|
|
|
return true, tempIndices, tempIndices[0].EndIdx + 1
|
|
|
|
|
} else {
|
|
|
|
|
return true, tempIndices, tempIndices[0].endIdx
|
|
|
|
|
return true, tempIndices, tempIndices[0].EndIdx
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
|