@ -1,5 +1,11 @@
package main
package main
// a matchIndex represents a match. It contains the start index and end index of the match
type matchIndex struct {
startIdx int
endIdx int
}
// takeZeroState takes the 0-state (if such a transition exists) for all states in the
// takeZeroState takes the 0-state (if such a transition exists) for all states in the
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// the second parameter is true.
// the second parameter is true.
@ -19,7 +25,17 @@ func takeZeroState(states []*State) (rtv []*State, isZero bool) {
// match tries to match the regex represented by given start-state, with
// match tries to match the regex represented by given start-state, with
// the given string
// the given string
func match ( start * State , str string ) ( startIdx int , endIdx int , matched bool ) {
func match ( start * State , str string ) ( indices [ ] matchIndex ) {
return matchHelper ( start , str , make ( [ ] matchIndex , 0 ) , 0 )
}
func matchHelper ( start * State , str string , indices [ ] matchIndex , offset int ) [ ] matchIndex {
// 'Base case' - exit if string is empty
if len ( str ) == 0 {
return indices
}
startIdx := 0
endIdx := 0
currentStates := make ( [ ] * State , 0 )
currentStates := make ( [ ] * State , 0 )
tempStates := make ( [ ] * State , 0 ) // Used to store states that should be used in next loop iteration
tempStates := make ( [ ] * State , 0 ) // Used to store states that should be used in next loop iteration
i := 0 // Index in string
i := 0 // Index in string
@ -28,12 +44,12 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
for i < len ( str ) && int ( str [ i ] ) != start . content {
for i < len ( str ) && int ( str [ i ] ) != start . content {
i ++
i ++
}
}
startIdx = i
i ++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
i ++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
}
}
// TODO - If start state is kleene star, try to match the next state
// TODO - If start state is kleene star, try to match the next state
currentStates = append ( currentStates , start )
currentStates = append ( currentStates , start )
startIdx = i
// Main loop
// Main loop
for i < len ( str ) {
for i < len ( str ) {
@ -57,8 +73,10 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
// This enables the 'greedy' behavior - last-state status is only checked if we can't match anything else
// This enables the 'greedy' behavior - last-state status is only checked if we can't match anything else
if state . isLast {
if state . isLast {
endIdx = i
endIdx = i
return startIdx , endIdx , true
indices = append ( indices , matchIndex { startIdx + offset , endIdx + offset } )
}
}
// Recursion - match with rest of string
return matchHelper ( start , str [ i : ] , indices , offset + i )
}
}
}
}
currentStates = make ( [ ] * State , len ( tempStates ) )
currentStates = make ( [ ] * State , len ( tempStates ) )
@ -80,12 +98,10 @@ func match(start *State, str string) (startIdx int, endIdx int, matched bool) {
for _ , state := range currentStates {
for _ , state := range currentStates {
if state . isLast {
if state . isLast {
endIdx = i
endIdx = i
return startIdx , endIdx , true
indices = append ( indices , matchIndex { startIdx + offset , endIdx + offset } )
} else {
return - 1 , - 1 , false
}
}
}
}
// Default
// Default
return - 1 , - 1 , false
return indices
}
}