@ -2,6 +2,8 @@ package regex
import (
"fmt"
"strconv"
"unicode"
)
// A Match represents a match found by the regex in a given string.
@ -77,6 +79,18 @@ func (regex Reg) Match(str string) bool {
return err == nil
}
// CompileMatch compiles expr and returns true if str contains a match of the expression.
// It is equivalent to [regexp.Match].
// An optional list of flags may be provided (see [ReFlag]).
// It returns an error (!= nil) if there was an error compiling the expression.
func CompileMatch ( expr string , str string , flags ... ReFlag ) ( bool , error ) {
re , err := Compile ( expr , flags ... )
if err != nil {
return false , err
}
return re . Match ( str ) , nil
}
// FindAll returns a slice containing all the 0-groups of the regex in the given string.
// A 0-group represents the match without any submatches.
func ( regex Reg ) FindAll ( str string ) [ ] Group {
@ -162,7 +176,7 @@ func (regex Reg) FindNthMatch(str string, n int) (Match, error) {
var matchFound bool
var matchIdx Match
for idx <= len ( str_runes ) {
matchFound , matchIdx , idx = findAllSubmatchHelper ( regex . start , str_runes , idx , regex . numGroups )
matchFound , matchIdx , idx = findAllSubmatchHelper ( regex . start , str_runes , idx , regex . numGroups , regex . preferLongest )
if matchFound {
matchNum ++
}
@ -182,7 +196,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
var matchIdx Match
indices := make ( [ ] Match , 0 )
for idx <= len ( str_runes ) {
matchFound , matchIdx , idx = findAllSubmatchHelper ( regex . start , str_runes , idx , regex . numGroups )
matchFound , matchIdx , idx = findAllSubmatchHelper ( regex . start , str_runes , idx , regex . numGroups , regex . preferLongest )
if matchFound {
indices = append ( indices , matchIdx )
}
@ -191,7 +205,7 @@ func (regex Reg) FindAllSubmatch(str string) []Match {
return indices
}
func addStateToList ( str [ ] rune , idx int , list [ ] nfaState , state nfaState , threadGroups [ ] Group , visited [ ] nfaState ) [ ] nfaState {
func addStateToList ( str [ ] rune , idx int , list [ ] nfaState , state nfaState , threadGroups [ ] Group , visited [ ] nfaState , preferLongest bool ) [ ] nfaState {
if stateExists ( list , state ) || stateExists ( visited , state ) {
return list
}
@ -199,32 +213,32 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
if state . isKleene || state . isQuestion {
copyThread ( state . splitState , state )
list = addStateToList ( str , idx , list , * state . splitState , threadGroups , visited )
list = addStateToList ( str , idx , list , * state . splitState , threadGroups , visited , preferLongest )
copyThread ( state . next , state )
list = addStateToList ( str , idx , list , * state . next , threadGroups , visited )
list = addStateToList ( str , idx , list , * state . next , threadGroups , visited , preferLongest )
return list
}
if state . isAlternation {
copyThread ( state . next , state )
list = addStateToList ( str , idx , list , * state . next , threadGroups , visited )
list = addStateToList ( str , idx , list , * state . next , threadGroups , visited , preferLongest )
copyThread ( state . splitState , state )
list = addStateToList ( str , idx , list , * state . splitState , threadGroups , visited )
list = addStateToList ( str , idx , list , * state . splitState , threadGroups , visited , preferLongest )
return list
}
state . threadGroups = append ( [ ] Group { } , threadGroups ... )
if state . assert != noneAssert {
if state . checkAssertion ( str , idx ) {
if state . checkAssertion ( str , idx , preferLongest ) {
copyThread ( state . next , state )
return addStateToList ( str , idx , list , * state . next , state . threadGroups , visited )
return addStateToList ( str , idx , list , * state . next , state . threadGroups , visited , preferLongest )
}
}
if state . groupBegin {
state . threadGroups [ state . groupNum ] . StartIdx = idx
return addStateToList ( str , idx , list , * state . next , state . threadGroups , visited )
return addStateToList ( str , idx , list , * state . next , state . threadGroups , visited , preferLongest )
}
if state . groupEnd {
state . threadGroups [ state . groupNum ] . EndIdx = idx
return addStateToList ( str , idx , list , * state . next , state . threadGroups , visited )
return addStateToList ( str , idx , list , * state . next , state . threadGroups , visited , preferLongest )
}
return append ( list , state )
@ -233,7 +247,7 @@ func addStateToList(str []rune, idx int, list []nfaState, state nfaState, thread
// Helper for FindAllMatches. Returns whether it found a match, the
// first Match it finds, and how far it got into the string ie. where
// the next search should start from.
func findAllSubmatchHelper ( start * nfaState , str [ ] rune , offset int , numGroups int ) ( bool , Match , int ) {
func findAllSubmatchHelper ( start * nfaState , str [ ] rune , offset int , numGroups int , preferLongest bool ) ( bool , Match , int ) {
// Base case - exit if offset exceeds string's length
if offset > len ( str ) {
// The second value here shouldn't be used, because we should exit when the third return value is > than len(str)
@ -248,7 +262,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
// If the first state is an assertion, makes sure the assertion
// is true before we do _anything_ else.
if start . assert != noneAssert {
if start . checkAssertion ( str , offset ) == false {
if start . checkAssertion ( str , offset , preferLongest ) == false {
i ++
return false , [ ] Group { } , i
}
@ -256,7 +270,7 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
start . threadGroups = newMatch ( numGroups + 1 )
start . threadGroups [ 0 ] . StartIdx = i
currentStates = addStateToList ( str , i , currentStates , * start , start . threadGroups , nil )
currentStates = addStateToList ( str , i , currentStates , * start , start . threadGroups , nil , preferLongest )
var match Match = nil
for idx := i ; idx <= len ( str ) ; idx ++ {
if len ( currentStates ) == 0 {
@ -274,9 +288,9 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
currentState . threadGroups [ 0 ] . EndIdx = idx
match = append ( [ ] Group { } , currentState . threadGroups ... )
break
} else if ! currentState . isAlternation && ! currentState . isKleene && ! currentState . isQuestion && ! currentState . groupBegin && ! currentState . groupEnd { // Normal character or assertion
if currentState . contentContains ( str , idx ) {
nextStates = addStateToList ( str , idx + 1 , nextStates , * currentState . next , currentState . threadGroups , nil )
} else if ! currentState . isAlternation && ! currentState . isKleene && ! currentState . isQuestion && ! currentState . groupBegin && ! currentState . groupEnd && currentState . assert == noneAssert { // Normal character
if currentState . contentContains ( str , idx , preferLongest ) {
nextStates = addStateToList ( str , idx + 1 , nextStates , * currentState . next , currentState . threadGroups , nil , preferLongest )
}
}
}
@ -291,3 +305,68 @@ func findAllSubmatchHelper(start *nfaState, str []rune, offset int, numGroups in
}
return false , [ ] Group { } , i + 1
}
// Expand appends template to dst, expanding any variables in template to the relevant capturing group.
//
// A variable is of the form '$n', where 'n' is a number. It will be replaced by the contents of the n-th capturing group.
// To insert a literal $, do not put a number after it. Alternatively, you can use $$.
// src is the input string, and match must be the result of [Reg.FindSubmatch].
func ( regex Reg ) Expand ( dst string , template string , src string , match Match ) string {
templateRuneSlc := [ ] rune ( template )
srcRuneSlc := [ ] rune ( src )
i := 0
for i < len ( templateRuneSlc ) {
c := templateRuneSlc [ i ]
if c == '$' {
i += 1
// The dollar sign is the last character of the string, or it is proceeded by another dollar sign
if i >= len ( templateRuneSlc ) || templateRuneSlc [ i ] == '$' {
dst += "$"
i ++
} else {
numStr := ""
for unicode . IsDigit ( templateRuneSlc [ i ] ) {
numStr += string ( templateRuneSlc [ i ] )
i ++
}
if numStr == "" {
dst += "$"
} else {
num , _ := strconv . Atoi ( numStr )
if num < len ( match ) {
dst += string ( srcRuneSlc [ match [ num ] . StartIdx : match [ num ] . EndIdx ] )
} else {
dst += "$" + numStr
}
}
}
} else {
dst += string ( c )
i ++
}
}
return dst
}
// LiteralPrefix returns a string that must begin any match of the given regular expression.
// The second return value is true if the string comprises the entire expression.
func ( regex Reg ) LiteralPrefix ( ) ( prefix string , complete bool ) {
state := regex . start
if state . assert != noneAssert {
state = state . next
}
for ! ( state . isLast ) && ( ! state . isAlternation ) && len ( state . content ) == 1 && state . assert == noneAssert {
if state . groupBegin || state . groupEnd {
state = state . next
continue
}
prefix += string ( rune ( state . content [ 0 ] ) )
state = state . next
}
if state . isLast {
complete = true
} else {
complete = false
}
return prefix , complete
}