package main
// a matchIndex represents a match. It contains the start index and end index of the match
type matchIndex struct {
startIdx int
endIdx int
}
// takeZeroState takes the 0-state (if such a transition exists) for all states in the
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// the second parameter is true.
func takeZeroState ( states [ ] * State ) ( rtv [ ] * State , isZero bool ) {
for _ , state := range states {
if len ( state . transitions [ EPSILON ] ) > 0 {
rtv = append ( rtv , state . transitions [ EPSILON ] ... )
}
}
for _ , state := range rtv {
if len ( state . transitions [ EPSILON ] ) > 0 {
return rtv , true
}
}
return rtv , false
}
// findAllMatches tries to findAllMatches the regex represented by given start-state, with
// the given string
func findAllMatches ( start * State , str string ) ( indices [ ] matchIndex ) {
return findAllMatchesHelper ( start , str , make ( [ ] matchIndex , 0 ) , 0 )
}
func findAllMatchesHelper ( start * State , str string , indices [ ] matchIndex , offset int ) [ ] matchIndex {
// 'Base case' - exit if string is empty
if len ( str ) == 0 {
return indices
}
foundPath := false
startIdx := 0
endIdx := 0
currentStates := make ( [ ] * State , 0 )
tempStates := make ( [ ] * State , 0 ) // Used to store states that should be used in next loop iteration
i := 0 // Index in string
// Increment until we hit a character matching the start state (assuming not 0-state)
if start . isEmpty == false {
for i < len ( str ) && int ( str [ i ] ) != start . content {
i ++
}
startIdx = i
i ++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
}
startingFrom := i
currentStates = append ( currentStates , start )
// Main loop
for i < len ( str ) {
foundPath = false
zeroStates := make ( [ ] * State , 0 )
// Keep taking zero-states, until there are no more left to take
zeroStates , isZero := takeZeroState ( currentStates )
tempStates = append ( tempStates , zeroStates ... )
for isZero == true {
zeroStates , isZero = takeZeroState ( tempStates )
tempStates = append ( tempStates , zeroStates ... )
}
currentStates = append ( currentStates , tempStates ... )
tempStates = nil
// Take any transitions corresponding to current character
for _ , state := range currentStates {
if len ( state . transitions [ int ( str [ i ] ) ] ) > 0 {
tempStates = append ( tempStates , state . transitions [ int ( str [ i ] ) ] ... )
foundPath = true
} else {
// This enables the 'greedy' behavior - last-state status is only checked if we can't match anything else
if state . isLast {
endIdx = i
indices = append ( indices , matchIndex { startIdx + offset , endIdx + offset } )
}
}
}
if foundPath == false {
// Recursion - match with rest of string if we have nowhere to go. If we haven't moved in the string, increment the counter by 1 to ensure we don't keep trying the same string over and over
if i == startingFrom {
i ++
}
return findAllMatchesHelper ( start , str [ i : ] , indices , offset + i )
}
currentStates = make ( [ ] * State , len ( tempStates ) )
copy ( currentStates , tempStates )
tempStates = nil
i ++
}
// End-of-string reached. Go to any 0-states. Then check if any of our states are in the end position.
for _ , state := range currentStates {
if len ( state . transitions [ EPSILON ] ) > 0 {
tempStates = append ( tempStates , state . transitions [ EPSILON ] ... )
}
}
currentStates = append ( currentStates , tempStates ... )
tempStates = nil
for _ , state := range currentStates {
if state . isLast {
endIdx = i
indices = append ( indices , matchIndex { startIdx + offset , endIdx + offset } )
}
}
// Default
return indices
}