@ -10,16 +10,16 @@ const EPSILON int = 0xF0000
type assertType int
type assertType int
const (
const (
NONE assertType = iota
noneAssert assertType = iota
SOS
sosAssert
EOS
eosAssert
WBOUND
wboundAssert
NONWBOUND
nonwboundAssert
PLA // Positive lookahead
plaAssert // Positive lookahead
NLA // Negative lookahead
nlaAssert // Negative lookahead
PLB // Positive lookbehind
plbAssert // Positive lookbehind
NLB // Negative lookbehind
nlbAssert // Negative lookbehind
ALWAYS_TRUE // An assertion that is always true
alwaysTrueAssert // An assertion that is always true
)
)
type State struct {
type State struct {
@ -105,24 +105,24 @@ func cloneStateHelper(state *State, cloneMap map[*State]*State) *State {
// Checks if the given state's assertion is true. Returns true if the given
// Checks if the given state's assertion is true. Returns true if the given
// state doesn't have an assertion.
// state doesn't have an assertion.
func ( s State ) checkAssertion ( str [ ] rune , idx int ) bool {
func ( s State ) checkAssertion ( str [ ] rune , idx int ) bool {
if s . assert == ALWAYS_TRUE {
if s . assert == alwaysTrueAssert {
return true
return true
}
}
if s . assert == SOS {
if s . assert == sosAssert {
// Single-line mode: Beginning of string
// Single-line mode: Beginning of string
// Multi-line mode: Previous character was newline
// Multi-line mode: Previous character was newline
return idx == 0 || ( multilineMode && ( idx > 0 && str [ idx - 1 ] == '\n' ) )
return idx == 0 || ( multilineMode && ( idx > 0 && str [ idx - 1 ] == '\n' ) )
}
}
if s . assert == EOS {
if s . assert == eosAssert {
// Single-line mode: End of string
// Single-line mode: End of string
// Multi-line mode: current character is newline
// Multi-line mode: current character is newline
// Index is at the end of the string, or it points to the last character which is a newline
// Index is at the end of the string, or it points to the last character which is a newline
return idx == len ( str ) || ( multilineMode && str [ idx ] == '\n' )
return idx == len ( str ) || ( multilineMode && str [ idx ] == '\n' )
}
}
if s . assert == WBOUND {
if s . assert == wboundAssert {
return isWordBoundary ( str , idx )
return isWordBoundary ( str , idx )
}
}
if s . assert == NONWBOUND {
if s . assert == nonwboundAssert {
return ! isWordBoundary ( str , idx )
return ! isWordBoundary ( str , idx )
}
}
if s . isLookaround ( ) {
if s . isLookaround ( ) {
@ -133,7 +133,7 @@ func (s State) checkAssertion(str []rune, idx int) bool {
startState := s . lookaroundNFA
startState := s . lookaroundNFA
var runesToMatch [ ] rune
var runesToMatch [ ] rune
var strToMatch string
var strToMatch string
if s . assert == PLA || s . assert == NLA {
if s . assert == plaAssert || s . assert == nlaAssert {
runesToMatch = str [ idx : ]
runesToMatch = str [ idx : ]
} else {
} else {
runesToMatch = str [ : idx ]
runesToMatch = str [ : idx ]
@ -149,21 +149,21 @@ func (s State) checkAssertion(str []rune, idx int) bool {
numMatchesFound := 0
numMatchesFound := 0
for _ , matchIdx := range matchIndices {
for _ , matchIdx := range matchIndices {
if s . assert == PLA || s . assert == NLA { // Lookahead - return true (or false) if at least one match starts at 0. Zero is used because the test-string _starts_ from idx.
if s . assert == plaAssert || s . assert == nlaAssert { // Lookahead - return true (or false) if at least one match starts at 0. Zero is used because the test-string _starts_ from idx.
if matchIdx [ 0 ] . StartIdx == 0 {
if matchIdx [ 0 ] . StartIdx == 0 {
numMatchesFound ++
numMatchesFound ++
}
}
}
}
if s . assert == PLB || s . assert == NLB { // Lookbehind - return true (or false) if at least one match _ends_ at the current index.
if s . assert == plbAssert || s . assert == nlbAssert { // Lookbehind - return true (or false) if at least one match _ends_ at the current index.
if matchIdx [ 0 ] . EndIdx == idx {
if matchIdx [ 0 ] . EndIdx == idx {
numMatchesFound ++
numMatchesFound ++
}
}
}
}
}
}
if s . assert == PLA || s . assert == PLB { // Positive assertions want at least one match
if s . assert == plaAssert || s . assert == plbAssert { // Positive assertions want at least one match
return numMatchesFound > 0
return numMatchesFound > 0
}
}
if s . assert == NLA || s . assert == NLB { // Negative assertions only want zero matches
if s . assert == nlaAssert || s . assert == nlbAssert { // Negative assertions only want zero matches
return numMatchesFound == 0
return numMatchesFound == 0
}
}
}
}
@ -172,7 +172,7 @@ func (s State) checkAssertion(str []rune, idx int) bool {
// Returns true if the contents of 's' contain the value at the given index of the given string
// Returns true if the contents of 's' contain the value at the given index of the given string
func ( s State ) contentContains ( str [ ] rune , idx int ) bool {
func ( s State ) contentContains ( str [ ] rune , idx int ) bool {
if s . assert != NONE {
if s . assert != noneAssert {
return s . checkAssertion ( str , idx )
return s . checkAssertion ( str , idx )
}
}
if s . allChars {
if s . allChars {
@ -183,7 +183,7 @@ func (s State) contentContains(str []rune, idx int) bool {
}
}
func ( s State ) isLookaround ( ) bool {
func ( s State ) isLookaround ( ) bool {
return s . assert == PLA || s . assert == PLB || s . assert == NLA || s . assert == NLB
return s . assert == plaAssert || s . assert == plbAssert || s . assert == nlaAssert || s . assert == nlbAssert
}
}
// Returns the matches for the character at the given index of the given string.
// Returns the matches for the character at the given index of the given string.
@ -192,7 +192,7 @@ func (s State) matchesFor(str []rune, idx int) ([]*State, int) {
// Assertions can be viewed as 'checks'. If the check fails, we return
// Assertions can be viewed as 'checks'. If the check fails, we return
// an empty array and 0.
// an empty array and 0.
// If it passes, we treat it like any other state, and return all the transitions.
// If it passes, we treat it like any other state, and return all the transitions.
if s . assert != NONE {
if s . assert != noneAssert {
if s . checkAssertion ( str , idx ) == false {
if s . checkAssertion ( str , idx ) == false {
return make ( [ ] * State , 0 ) , - 1
return make ( [ ] * State , 0 ) , - 1
}
}
@ -270,7 +270,7 @@ func concatenate(s1 *State, s2 *State) *State {
}
}
func kleene ( s1 State ) ( * State , error ) {
func kleene ( s1 State ) ( * State , error ) {
if s1 . isEmpty && s1 . assert != NONE {
if s1 . isEmpty && s1 . assert != noneAssert {
return nil , fmt . Errorf ( "previous token is not quantifiable" )
return nil , fmt . Errorf ( "previous token is not quantifiable" )
}
}
@ -328,7 +328,7 @@ func newState() State {
ret := State {
ret := State {
output : make ( [ ] * State , 0 ) ,
output : make ( [ ] * State , 0 ) ,
transitions : make ( map [ int ] [ ] * State ) ,
transitions : make ( map [ int ] [ ] * State ) ,
assert : NONE ,
assert : noneAssert ,
except : append ( [ ] rune { } , 0 ) ,
except : append ( [ ] rune { } , 0 ) ,
lookaroundRegex : "" ,
lookaroundRegex : "" ,
groupEnd : false ,
groupEnd : false ,
@ -343,6 +343,6 @@ func zeroLengthMatchState() State {
start := newState ( )
start := newState ( )
start . content = newContents ( EPSILON )
start . content = newContents ( EPSILON )
start . isEmpty = true
start . isEmpty = true
start . assert = ALWAYS_TRUE
start . assert = alwaysTrueAssert
return start
return start
}
}