package main
import (
"fmt"
"sort"
)
// a Match stores a slice of all the capturing groups in a match.
type Match [ ] Group
// a Group represents a group. It contains the start index and end index of the match
type Group struct {
startIdx int
endIdx int
}
func newMatch ( size int ) Match {
toRet := make ( [ ] Group , size )
for i := range toRet {
toRet [ i ] . startIdx = - 1
toRet [ i ] . endIdx = - 1
}
return toRet
}
// Returns the number of valid groups in the match
func ( m Match ) numValidGroups ( ) int {
numValid := 0
for _ , g := range m {
if g . startIdx >= 0 && g . endIdx >= 0 {
numValid ++
}
}
return numValid
}
// Returns a string containing the indices of all (valid) groups in the match
func ( m Match ) toString ( ) string {
var toRet string
for i , g := range m {
if g . isValid ( ) {
toRet += fmt . Sprintf ( "Group %d\n" , i )
toRet += g . toString ( )
toRet += "\n"
}
}
return toRet
}
// Converts the Group into a string representation:
func ( idx Group ) toString ( ) string {
return fmt . Sprintf ( "%d\t%d" , idx . startIdx , idx . endIdx )
}
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
// Returns whether a group contains valid indices
func ( g Group ) isValid ( ) bool {
return g . startIdx >= 0 && g . endIdx >= 0
}
// takeZeroState takes the 0-state (if such a transition exists) for all states in the
// given slice. It returns the resulting states. If any of the resulting states is a 0-state,
// the second ret val is true.
// If a state begins or ends a capturing group, its 'thread' is updated to contain the correct index.
func takeZeroState ( states [ ] * State , numGroups int , idx int ) ( rtv [ ] * State , isZero bool ) {
for _ , state := range states {
if len ( state . transitions [ EPSILON ] ) > 0 {
for _ , s := range state . transitions [ EPSILON ] {
if s . threadGroups == nil {
s . threadGroups = newMatch ( numGroups + 1 )
}
copy ( s . threadGroups , state . threadGroups )
if s . groupBegin {
s . threadGroups [ s . groupNum ] . startIdx = idx
// openParenGroups = append(openParenGroups, s.groupNum)
}
if s . groupEnd {
s . threadGroups [ s . groupNum ] . endIdx = idx
// closeParenGroups = append(closeParenGroups, s.groupNum)
}
}
rtv = append ( rtv , state . transitions [ EPSILON ] ... )
}
}
for _ , state := range rtv {
if len ( state . transitions [ EPSILON ] ) > 0 {
return rtv , true
}
}
return rtv , false
}
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
// zeroMatchPossible returns true if a zero-length match is possible
// from any of the given states, given the string and our position in it.
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
// It uses the same algorithm to find zero-states as the one inside the loop,
// so I should probably put it in a function.
func zeroMatchPossible ( str [ ] rune , idx int , numGroups int , states ... * State ) bool {
zeroStates , isZero := takeZeroState ( states , numGroups , idx )
tempstates := make ( [ ] * State , 0 , len ( zeroStates ) + len ( states ) )
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
tempstates = append ( tempstates , states ... )
tempstates = append ( tempstates , zeroStates ... )
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
num_appended := 0 // number of unique states addded to tempstates
for isZero == true {
zeroStates , isZero = takeZeroState ( tempstates , numGroups , idx )
tempstates , num_appended = unique_append ( tempstates , zeroStates ... )
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
if num_appended == 0 { // break if we haven't appended any more unique values
break
}
}
for _ , state := range tempstates {
if state . isEmpty && ( state . assert == NONE || state . checkAssertion ( str , idx ) ) && state . isLast {
return true
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
}
}
return false
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
}
// Prunes the slice by removing overlapping indices.
func pruneIndices ( indices [ ] Match ) [ ] Match {
// First, sort the slice by the start indices
sort . Slice ( indices , func ( i , j int ) bool {
return indices [ i ] [ 0 ] . startIdx < indices [ j ] [ 0 ] . startIdx
} )
toRet := make ( [ ] Match , 0 , len ( indices ) )
current := indices [ 0 ]
for _ , idx := range indices [ 1 : ] {
// idx doesn't overlap with current (starts after current ends), so add current to result
// and update the current.
if idx [ 0 ] . startIdx >= current [ 0 ] . endIdx {
toRet = append ( toRet , current )
current = idx
} else if idx [ 0 ] . endIdx > current [ 0 ] . endIdx {
// idx overlaps, but it is longer, so update current
current = idx
}
}
// Add last state
toRet = append ( toRet , current )
return toRet
}
// FindString returns a _string_ containing the _text_ of the _leftmost_ match of
// the regex, in the given string. The return value will be an empty string in two situations:
// 1. No match was found
// 2. The match was an empty string
func FindString ( regex Reg , str string ) string {
match , err := FindNthMatch ( regex , str , 1 )
if err != nil {
return ""
}
return str [ match [ 0 ] . startIdx : match [ 0 ] . endIdx ]
}
// FindAllString is the 'all' version of FindString.
// It returns a _slice of strings_ containing the _text_ of _all_ matches of
// the regex, in the given string.
//func FindAllString(regex Reg, str []string) []string {
//
//}
// FindNthMatch finds the 'n'th match of the regex represented by the given start-state, with
// the given string.
// It returns an error (!= nil) if there are fewer than 'n' matches in the string.
func FindNthMatch ( regex Reg , str string , n int ) ( Match , error ) {
idx := 0
matchNum := 0
str_runes := [ ] rune ( str )
var matchFound bool
var matchIdx Match
for idx <= len ( str_runes ) {
matchFound , matchIdx , idx = findAllMatchesHelper ( regex . start , str_runes , idx , regex . numGroups )
if matchFound {
matchNum ++
}
if matchNum == n {
return matchIdx , nil
}
}
// We haven't found the nth match after scanning the string - Return an error
return nil , fmt . Errorf ( "Invalid match index. Too few matches found." )
}
// FindAllMatches tries to find all matches of the regex represented by given start-state, with
// the given string
func FindAllMatches ( regex Reg , str string ) [ ] Match {
idx := 0
str_runes := [ ] rune ( str )
var matchFound bool
var matchIdx Match
indices := make ( [ ] Match , 0 )
for idx <= len ( str_runes ) {
matchFound , matchIdx , idx = findAllMatchesHelper ( regex . start , str_runes , idx , regex . numGroups )
if matchFound {
indices = append ( indices , matchIdx )
}
}
if len ( indices ) > 0 {
return pruneIndices ( indices )
}
return indices
}
// Helper for FindAllMatches. Returns whether it found a match, the
// first Match it finds, and how far it got into the string ie. where
// the next search should start from.
//
// Might return duplicates or overlapping indices, so care must be taken to prune the resulting array.
func findAllMatchesHelper ( start * State , str [ ] rune , offset int , numGroups int ) ( bool , Match , int ) {
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
// Base case - exit if offset exceeds string's length
if offset > len ( str ) {
// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
return false , [ ] Group { } , offset
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
}
// Hold a list of match indices for the current run. When we
// can no longer find a match, the match with the largest range is
// chosen as the match for the entire string.
// This allows us to pick the longest possible match (which is how greedy matching works).
// COMMENT ABOVE IS CURRENTLY NOT UP-TO-DATE
tempIndices := newMatch ( numGroups + 1 )
foundPath := false
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
startIdx := offset
endIdx := offset
currentStates := make ( [ ] * State , 0 )
tempStates := make ( [ ] * State , 0 ) // Used to store states that should be used in next loop iteration
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
i := offset // Index in string
startingFrom := i // Store starting index
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
// If the first state is an assertion, makes sure the assertion
// is true before we do _anything_ else.
if start . assert != NONE {
if start . checkAssertion ( str , offset ) == false {
i ++
return false , [ ] Group { } , i
}
}
// Increment until we hit a character matching the start state (assuming not 0-state)
if start . isEmpty == false {
for i < len ( str ) && ! start . contentContains ( str , i ) {
i ++
}
startIdx = i
startingFrom = i
i ++ // Advance to next character (if we aren't at a 0-state, which doesn't match anything), so that we can check for transitions. If we advance at a 0-state, we will never get a chance to match the first character
}
start . threadGroups = newMatch ( numGroups + 1 )
// Check if the start state begins a group - if so, add the start index to our list
if start . groupBegin {
start . threadGroups [ start . groupNum ] . startIdx = i
// tempIndices[start.groupNum].startIdx = i
}
currentStates = append ( currentStates , start )
// Main loop
for i < len ( str ) {
foundPath = false
zeroStates := make ( [ ] * State , 0 )
// Keep taking zero-states, until there are no more left to take
// Objective: If any of our current states have transitions to 0-states, replace them with the 0-state. Do this until there are no more transitions to 0-states, or there are no more unique 0-states to take.
zeroStates , isZero := takeZeroState ( currentStates , numGroups , i )
tempStates = append ( tempStates , zeroStates ... )
num_appended := 0
for isZero == true {
zeroStates , isZero = takeZeroState ( tempStates , numGroups , i )
tempStates , num_appended = unique_append ( tempStates , zeroStates ... )
if num_appended == 0 { // Break if we haven't appended any more unique values
break
}
}
currentStates , _ = unique_append ( currentStates , tempStates ... )
tempStates = nil
// Take any transitions corresponding to current character
numStatesMatched := 0 // The number of states which had at least 1 match for this round
assertionFailed := false // Whether or not an assertion failed for this round
lastStateInList := false // Whether or not a last state was in our list of states
var lastStatePtr * State = nil // Pointer to the last-state, if it was found
lastLookaroundInList := false // Whether or not a last state (that is a lookaround) was in our list of states
for _ , state := range currentStates {
matches , numMatches := state . matchesFor ( str , i )
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
if numMatches > 0 {
numStatesMatched ++
tempStates = append ( tempStates , matches ... )
foundPath = true
for _ , m := range matches {
if m . threadGroups == nil {
m . threadGroups = newMatch ( numGroups + 1 )
}
copy ( m . threadGroups , state . threadGroups )
}
}
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
if numMatches < 0 {
assertionFailed = true
}
if state . isLast {
if state . isLookaround ( ) {
lastLookaroundInList = true
}
lastStateInList = true
lastStatePtr = state
}
}
if assertionFailed && numStatesMatched == 0 { // Nothing has matched and an assertion has failed
// If I'm being completely honest, I'm not sure why I have to check specifically for a _lookaround_
// state. The explanation below is my attempt to explain this behavior.
// If you replace 'lastLookaroundInList' with 'lastStateInList', one of the test cases fails.
//
// One of the states in our list was a last state and a lookaround. In this case, we
// don't abort upon failure of the assertion, because we have found
// another path to a final state.
// Even if the last state _was_ an assertion, we can use the previously
// saved indices to find a match.
if lastLookaroundInList {
break
} else {
if i == startingFrom {
i ++
}
return false , [ ] Group { } , i
}
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
}
// Check if we can find a state in our list that is:
// a. A last-state
// b. Empty
// c. Doesn't assert anything
for _ , s := range currentStates {
if s . isLast && s . isEmpty && s . assert == NONE {
lastStatePtr = s
lastStateInList = true
}
}
if lastStateInList { // A last-state was in the list of states. add the matchIndex to our MatchIndex list
for j := 1 ; j < numGroups + 1 ; j ++ {
tempIndices [ j ] = lastStatePtr . threadGroups [ j ]
}
endIdx = i
tempIndices [ 0 ] = Group { startIdx , endIdx }
}
// Check if we can find a zero-length match
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
if foundPath == false {
if ok := zeroMatchPossible ( str , i , numGroups , currentStates ... ) ; ok {
if tempIndices [ 0 ] . isValid ( ) == false {
tempIndices [ 0 ] = Group { startIdx , startIdx }
}
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
}
// If we haven't moved in the string, increment the counter by 1
// to ensure we don't keep trying the same string over and over.
// if i == startingFrom {
startIdx ++
// i++
// }
if tempIndices . numValidGroups ( ) > 0 && tempIndices [ 0 ] . isValid ( ) {
if tempIndices [ 0 ] . startIdx == tempIndices [ 0 ] . endIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
return true , tempIndices , tempIndices [ 0 ] . endIdx + 1
} else {
return true , tempIndices , tempIndices [ 0 ] . endIdx
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
}
}
return false , [ ] Group { } , startIdx
}
currentStates = make ( [ ] * State , len ( tempStates ) )
copy ( currentStates , tempStates )
tempStates = nil
i ++
}
// End-of-string reached. Go to any 0-states, until there are no more 0-states to go to. Then check if any of our states are in the end position.
// This is the exact same algorithm used inside the loop, so I should probably put it in a function.
zeroStates , isZero := takeZeroState ( currentStates , numGroups , i )
tempStates = append ( tempStates , zeroStates ... )
num_appended := 0 // Number of unique states addded to tempStates
for isZero == true {
zeroStates , isZero = takeZeroState ( tempStates , numGroups , i )
tempStates , num_appended = unique_append ( tempStates , zeroStates ... )
if num_appended == 0 { // Break if we haven't appended any more unique values
break
}
}
currentStates = append ( currentStates , tempStates ... )
tempStates = nil
for _ , state := range currentStates {
// Only add the match if the start index is in bounds. If the state has an assertion,
// make sure the assertion checks out.
if state . isLast && i <= len ( str ) {
if state . assert == NONE || state . checkAssertion ( str , i ) {
for j := 1 ; j < numGroups + 1 ; j ++ {
tempIndices [ j ] = state . threadGroups [ j ]
}
endIdx = i
tempIndices [ 0 ] = Group { startIdx , endIdx }
}
}
}
if tempIndices . numValidGroups ( ) > 0 {
if tempIndices [ 0 ] . startIdx == tempIndices [ 0 ] . endIdx { // If we have a zero-length match, we have to shift the index at which we start. Otherwise we keep looking at the same paert of the string over and over.
return true , tempIndices , tempIndices [ 0 ] . endIdx + 1
} else {
return true , tempIndices , tempIndices [ 0 ] . endIdx
Big rewrite - assertion handling, zero-match fixes, change in recursive calls
I added support for transitions. I wrote a function to determine if
a given state has transitions for a character at a given point in the
string. This helps me check if the current state has an assertion, and
take actions based on that.
I also fixed zero-length matching (almost, see todo.txt). It works for
nearly all cases I could think of, although I still need to write more
tests. I wrote a function to check if zero-length matches are possible
with a given state.
I also changed the way recursive calls work. Rather than passing a
modified string, the function stores the location in the input string.
This location is updated with each call to the function.
Finally, the function now increments the offset by 1 instead of
incrementing by the length of the longest match. This leads to a bit of
overhead eg. if a regex matches index 1-5, then 1-5, 2-5, 3-5, 4-5 are
all stored. To fix this, I wrote (and used) a function to check if
a match overlaps with any matches in a slice.
3 months ago
}
}
if startIdx == startingFrom { // Increment starting index if we haven't moved in the string. Prevents us from matching the same part of the string over and over.
startIdx ++
}
return false , [ ] Group { } , startIdx
}