@ -6,6 +6,21 @@ type matchIndex struct {
endIdx int
endIdx int
}
}
// Returns true if the given matchIndex is an improper subset of any of the indices in the slice.
// When we add an index to our slice, we want to make sure a larger match isn't already present.
func overlaps ( idx matchIndex , idxes [ ] matchIndex ) bool {
for _ , val := range idxes {
if idx . startIdx >= val . startIdx && idx . endIdx <= val . endIdx {
// A zero-length match doesn't overlap if it is located at the start or end
// of the other match
if ! ( idx . startIdx == idx . endIdx && ( idx . startIdx == val . startIdx || idx . startIdx == val . endIdx ) ) {
return true
}
}
}
return false
}
// takeZeroState takes the 0-state (if such a transition exists) for all states in the
// takeZeroState takes the 0-state (if such a transition exists) for all states in the
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// the second parameter is true.
// the second parameter is true.
@ -23,31 +38,66 @@ func takeZeroState(states []*State) (rtv []*State, isZero bool) {
return rtv , false
return rtv , false
}
}
// zeroMatchPossible returns true if a zero-length match is possible
// from any of the given states.
// It uses the same algorithm to find zero-states as the one inside the loop,
// so I should probably put it in a function.
func zeroMatchPossible ( states ... * State ) bool {
zerostates , iszero := takeZeroState ( states )
tempstates := make ( [ ] * State , 0 )
tempstates = append ( tempstates , states ... )
tempstates = append ( tempstates , zerostates ... )
num_appended := 0 // number of unique states addded to tempstates
for iszero == true {
zerostates , iszero = takeZeroState ( tempstates )
tempstates , num_appended = unique_append ( tempstates , zerostates ... )
if num_appended == 0 { // break if we haven't appended any more unique values
break
}
}
for _ , state := range tempstates {
if state . isEmpty && state . assert == NONE && state . isLast {
return true
}
}
return false
}
// findAllMatches tries to findAllMatches the regex represented by given start-state, with
// findAllMatches tries to findAllMatches the regex represented by given start-state, with
// the given string
// the given string
func findAllMatches ( start * State , str string ) ( indices [ ] matchIndex ) {
func findAllMatches ( start * State , str string ) ( indices [ ] matchIndex ) {
return findAllMatchesHelper ( start , str , make ( [ ] matchIndex , 0 ) , 0 )
return findAllMatchesHelper ( start , str , make ( [ ] matchIndex , 0 ) , 0 )
}
}
func findAllMatchesHelper ( start * State , str string , indices [ ] matchIndex , offset int ) [ ] matchIndex {
func findAllMatchesHelper ( start * State , str string , indices [ ] matchIndex , offset int ) [ ] matchIndex {
// 'Base case' - exit if string is empty.
// Base case - exit if offset exceeds string's length
if len ( str ) == 0 {
if offset > len ( str ) {
// If the start is a Kleene star, then it should also match an empty string.
return indices
if start . isKleene && start . isLast {
}
indices , _ = unique_append ( indices , matchIndex { offset , offset } )
// 'Base case' - if we are at the end of the string, check if we can add a zero-length match
if offset == len ( str ) {
// Get all zero-state matches. If we can get to a zero-state without matching anything, we
// can add a zero-length match. This is all true only if the start state itself matches nothing.
if start . isEmpty && start . assert == NONE {
if zeroMatchPossible ( start ) {
if ! overlaps ( matchIndex { offset , offset } , indices ) {
indices , _ = unique_append ( indices , matchIndex { offset , offset } )
}
}
}
}
return indices
return indices
}
}
foundPath := false
foundPath := false
startIdx := 0
startIdx := offset
endIdx := 0
endIdx := offset
currentStates := make ( [ ] * State , 0 )
currentStates := make ( [ ] * State , 0 )
tempStates := make ( [ ] * State , 0 ) // Used to store states that should be used in next loop iteration
tempStates := make ( [ ] * State , 0 ) // Used to store states that should be used in next loop iteration
i := 0 // Index in string
i := offset // Index in string
startingFrom := i // Store starting index
startingFrom := i // Store starting index
// Increment until we hit a character matching the start state (assuming not 0-state)
// Increment until we hit a character matching the start state (assuming not 0-state)
if start . isEmpty == false {
if start . isEmpty == false {
for i < len ( str ) && ! start . content . contains ( int ( str [ i ] ) ) {
for i < len ( str ) && ! start . contentContains ( [ ] rune ( str ) , i ) {
i ++
i ++
}
}
startIdx = i
startIdx = i
@ -83,21 +133,41 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
tempStates = nil
tempStates = nil
// Take any transitions corresponding to current character
// Take any transitions corresponding to current character
numStatesMatched := 0 // The number of states which had at least 1 match for this round
assertionFailed := false // Whether or not an assertion failed for this round
for _ , state := range currentStates {
for _ , state := range currentStates {
if len ( state . transitions [ int ( str [ i ] ) ] ) > 0 {
matches , numMatches := state . matchesFor ( [ ] rune ( str ) , i )
tempStates = append ( tempStates , state . transitions [ int ( str [ i ] ) ] ... )
if numMatches > 0 {
numStatesMatched ++
tempStates = append ( tempStates , matches ... )
foundPath = true
foundPath = true
}
}
if numMatches < 0 {
assertionFailed = true
}
if state . isLast {
if state . isLast {
endIdx = i
endIdx = i
tempIndices , _ = unique_append ( tempIndices , matchIndex { startIdx + offset , endIdx + offset } )
tempIndices , _ = unique_append ( tempIndices , matchIndex { startIdx , endIdx } )
}
}
}
}
// Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over
if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed - bort
if foundPath == false {
if i == startingFrom {
if i == startingFrom {
i ++
i ++
}
}
return findAllMatchesHelper ( start , str , indices , i )
}
// Recursion - match with rest of string if we have nowhere to go.
// First check if we can find a zero-length match
if foundPath == false {
if zeroMatchPossible ( currentStates ... ) {
tempIndices , _ = unique_append ( tempIndices , matchIndex { startIdx , startIdx } )
}
// If we haven't moved in the string, increment the counter by 1
// to ensure we don't keep trying the same string over and over.
// if i == startingFrom {
startIdx ++
// i++
// }
// Get the maximum index-range from the list
// Get the maximum index-range from the list
if len ( tempIndices ) > 0 {
if len ( tempIndices ) > 0 {
indexToAdd := Reduce ( tempIndices , func ( i1 matchIndex , i2 matchIndex ) matchIndex {
indexToAdd := Reduce ( tempIndices , func ( i1 matchIndex , i2 matchIndex ) matchIndex {
@ -108,9 +178,11 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
}
}
return i2
return i2
} )
} )
indices , _ = unique_append ( indices , indexToAdd )
if ! overlaps ( indexToAdd , indices ) {
indices , _ = unique_append ( indices , indexToAdd )
}
}
}
return findAllMatchesHelper ( start , str [ i : ] , indices , offset + i )
return findAllMatchesHelper ( start , str , indices , startIdx )
}
}
currentStates = make ( [ ] * State , len ( tempStates ) )
currentStates = make ( [ ] * State , len ( tempStates ) )
copy ( currentStates , tempStates )
copy ( currentStates , tempStates )
@ -137,9 +209,9 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
for _ , state := range currentStates {
for _ , state := range currentStates {
// Only add the match if the start index is in bounds
// Only add the match if the start index is in bounds
if state . isLast && startIdx + offset < len ( str ) + offset {
if state . isLast && startIdx < len ( str ) {
endIdx = i
endIdx = i
tempIndices , _ = unique_append ( tempIndices , matchIndex { startIdx + offset , endIdx + offset } )
tempIndices , _ = unique_append ( tempIndices , matchIndex { startIdx , endIdx } )
}
}
}
}
// Get the maximum index-range from the list
// Get the maximum index-range from the list
@ -152,9 +224,11 @@ func findAllMatchesHelper(start *State, str string, indices []matchIndex, offset
}
}
return i2
return i2
} )
} )
indices , _ = unique_append ( indices , indexToAdd )
if ! overlaps ( indexToAdd , indices ) {
indices , _ = unique_append ( indices , indexToAdd )
}
}
}
// Default
// Default - call on empty string to get any trailing zero-length matches
return indices
return findAllMatchesHelper ( start , str , indices , startIdx + 1 )
}
}