package main
// a matchIndex represents a match. It contains the start index and end index of the match
type matchIndex struct {
startIdx int
endIdx int
}
// takeZeroState takes the 0-state (if such a transition exists) for all states in the
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// the second parameter is true.
func takeZeroState ( states [ ] * State ) ( rtv [ ] * State , isZero bool ) {
for _ , state := range states {
if len ( state . transitions [ EPSILON ] ) > 0 {
rtv = append ( rtv , state . transitions [ EPSILON ] ... )
}
}
for _ , state := range rtv {
if len ( state . transitions [ EPSILON ] ) > 0 {
return rtv , true
}
}
return rtv , false
}
// findAllMatches tries to findAllMatches the regex represented by given start-state, with
// the given string
func findAllMatches ( start * State , str string ) ( indices [ ] matchIndex ) {
return findAllMatchesHelper ( start , str , make ( [ ] matchIndex , 0 ) , 0 )
}
func findAllMatchesHelper ( start * State , str string , indices [ ] matchIndex , offset int ) [ ] matchIndex {
// 'Base case' - exit if string is empty.
if len ( str ) == 0 {
// If the start is a Kleene star, then it should also match an empty string.
if start . isKleene && start . isLast {
indices , _ = unique_append ( indices , matchIndex { offset , offset } )
}
return indices
}
foundPath := false
startIdx := 0
endIdx := 0
currentStates := make ( [ ] * State , 0 )
tempStates := make ( [ ] * State , 0 ) // Used to store states that should be used in next loop iteration
i := 0 // Index in string
startingFrom := i // Store starting index
// Increment until we hit a character matching the start state (assuming not 0-state)
if start . isEmpty == false {
for i < len ( str ) && ! start . content . contains ( int ( str [ i ] ) ) {
i ++
}
startIdx = i
startingFrom = i
i ++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
}
currentStates = append ( currentStates , start )
// Hold a list of match indices for the current run. When we
// can no longer find a match, the match with the largest range is
// chosen as the match for the entire string.
// This allows us to pick the longest possible match (which is how greedy matching works).
tempIndices := make ( [ ] matchIndex , 0 )
// Main loop
for i < len ( str ) {
foundPath = false
zeroStates := make ( [ ] * State , 0 )
// Keep taking zero-states, until there are no more left to take
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
zeroStates , isZero := takeZeroState ( currentStates )
tempStates = append ( tempStates , zeroStates ... )
num_appended := 0
for isZero == true {
zeroStates , isZero = takeZeroState ( tempStates )
tempStates , num_appended = unique_append ( tempStates , zeroStates ... )
if num_appended == 0 { // Break if we haven't appended any more unique values
break
}
}
currentStates , _ = unique_append ( currentStates , tempStates ... )
tempStates = nil
// Take any transitions corresponding to current character
for _ , state := range currentStates {
if len ( state . transitions [ int ( str [ i ] ) ] ) > 0 {
tempStates = append ( tempStates , state . transitions [ int ( str [ i ] ) ] ... )
foundPath = true
}
if state . isLast {
endIdx = i
tempIndices , _ = unique_append ( tempIndices , matchIndex { startIdx + offset , endIdx + offset } )
}
}
// Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over
if foundPath == false {
if i == startingFrom {
i ++
}
// Get the maximum index-range from the list
if len ( tempIndices ) > 0 {
indexToAdd := Reduce ( tempIndices , func ( i1 matchIndex , i2 matchIndex ) matchIndex {
r1 := i1 . endIdx - i1 . startIdx
r2 := i2 . endIdx - i2 . startIdx
if r1 >= r2 {
return i1
}
return i2
} )
indices , _ = unique_append ( indices , indexToAdd )
}
return findAllMatchesHelper ( start , str [ i : ] , indices , offset + i )
}
currentStates = make ( [ ] * State , len ( tempStates ) )
copy ( currentStates , tempStates )
tempStates = nil
i ++
}
// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
zeroStates , isZero := takeZeroState ( currentStates )
tempStates = append ( tempStates , zeroStates ... )
num_appended := 0 // Number of unique states addded to tempStates
for isZero == true {
zeroStates , isZero = takeZeroState ( tempStates )
tempStates , num_appended = unique_append ( tempStates , zeroStates ... )
if num_appended == 0 { // Break if we haven't appended any more unique values
break
}
}
currentStates = append ( currentStates , tempStates ... )
tempStates = nil
for _ , state := range currentStates {
// Only add the match if the start index is in bounds
if state . isLast && startIdx + offset < len ( str ) + offset {
endIdx = i
tempIndices , _ = unique_append ( tempIndices , matchIndex { startIdx + offset , endIdx + offset } )
}
}
// Get the maximum index-range from the list
if len ( tempIndices ) > 0 {
indexToAdd := Reduce ( tempIndices , func ( i1 matchIndex , i2 matchIndex ) matchIndex {
r1 := i1 . endIdx - i1 . startIdx
r2 := i2 . endIdx - i2 . startIdx
if r1 >= r2 {
return i1
}
return i2
} )
indices , _ = unique_append ( indices , indexToAdd )
}
// Default
return indices
}