package main
import "slices"
const EPSILON int = 0
type assertType int
const (
NONE assertType = iota
SOS
EOS
WBOUND
NONWBOUND
)
type State struct {
content stateContents // Contents of current state
isEmpty bool // If it is empty - Union operator and Kleene star states will be empty
isLast bool // If it is the last state (acept state)
output [ ] * State // The outputs of the current state ie. the 'outward arrows'. A union operator state will have more than one of these.
transitions map [ int ] [ ] * State // Transitions to different states (maps a character (int representation) to a _list of states. This is useful if one character can lead multiple states eg. ab|aa)
isKleene bool // Identifies whether current node is a 0-state representing Kleene star
assert assertType // Type of assertion of current node - NONE means that the node doesn't assert anything
zeroMatchFound bool // Whether or not the state has been used for a zero-length match - only relevant for zero states
}
// Returns true if the contents of 's' contain the value at the given index of the given string
func ( s State ) contentContains ( str [ ] rune , idx int ) bool {
if s . assert == SOS {
return idx == 0
}
if s . assert == EOS {
return idx == len ( str )
}
if s . assert == WBOUND {
if s . assert == WBOUND {
return isWordBoundary ( str , idx )
}
if s . assert == NONWBOUND {
return ! isWordBoundary ( str , idx )
}
}
// Default - s.assert must be NONE
return slices . Contains ( s . content , int ( str [ idx ] ) )
}
// Returns the matches for the character at the given index of the given string.
// Also returns the number of matches. Returns -1 if an assertion failed.
func ( s State ) matchesFor ( str [ ] rune , idx int ) ( [ ] * State , int ) {
// Assertions can be viewed as 'checks'. If the check fails, we return
// an empty array and 0.
// If it passes, we treat it like any other state, and return all the transitions.
if s . assert == SOS && idx != 0 {
return make ( [ ] * State , 0 ) , - 1
}
if s . assert == EOS && idx != len ( str ) {
return make ( [ ] * State , 0 ) , - 1
}
if s . assert == WBOUND && ! isWordBoundary ( str , idx ) {
return make ( [ ] * State , 0 ) , - 1
}
if s . assert == NONWBOUND && isWordBoundary ( str , idx ) {
return make ( [ ] * State , 0 ) , - 1
}
return s . transitions [ int ( str [ idx ] ) ] , len ( s . transitions [ int ( str [ idx ] ) ] )
}
type NFA struct {
start State
outputs [ ] State
}
// verifyLastStatesHelper performs the depth-first recursion needed for verifyLastStates
func verifyLastStatesHelper ( state * State , visited map [ * State ] bool ) {
if len ( state . transitions ) == 0 {
state . isLast = true
return
}
// if len(state.transitions) == 1 && len(state.transitions[state.content]) == 1 && state.transitions[state.content][0] == state { // Eg. a*
if len ( state . transitions ) == 1 { // Eg. a*
var moreThanOneTrans bool // Dummy variable, check if all the transitions for the current's state's contents have a length of one
for _ , c := range state . content {
if len ( state . transitions [ c ] ) != 1 || state . transitions [ c ] [ 0 ] != state {
moreThanOneTrans = true
}
}
state . isLast = ! moreThanOneTrans
}
if state . isKleene { // A State representing a Kleene Star has transitions going out, which loop back to it. If all those transitions point to the same (single) state, then it must be a last state
transitionDests := make ( [ ] * State , 0 )
for _ , v := range state . transitions {
transitionDests = append ( transitionDests , v ... )
}
if allEqual ( transitionDests ... ) {
state . isLast = true
return
}
}
if visited [ state ] == true {
return
}
visited [ state ] = true
for _ , states := range state . transitions {
for i := range states {
if states [ i ] != state {
verifyLastStatesHelper ( states [ i ] , visited )
}
}
}
}
// verifyLastStates enables the 'isLast' flag for the leaf nodes (last states)
func verifyLastStates ( start [ ] * State ) {
verifyLastStatesHelper ( start [ 0 ] , make ( map [ * State ] bool ) )
}
func concatenate ( s1 * State , s2 * State ) * State {
for i := range s1 . output {
for _ , c := range s2 . content { // Create transitions for every element in s2's content to s2'
s1 . output [ i ] . transitions [ c ] , _ = unique_append ( s1 . output [ i ] . transitions [ c ] , s2 )
}
}
s1 . output = s2 . output
return s1
}
func kleene ( s1 State ) * State {
toReturn := & State { }
toReturn . transitions = make ( map [ int ] [ ] * State )
toReturn . content = newContents ( EPSILON )
toReturn . isEmpty = true
toReturn . isKleene = true
toReturn . output = append ( toReturn . output , toReturn )
for i := range s1 . output {
for _ , c := range toReturn . content {
s1 . output [ i ] . transitions [ c ] , _ = unique_append ( s1 . output [ i ] . transitions [ c ] , toReturn )
}
}
for _ , c := range s1 . content {
toReturn . transitions [ c ] , _ = unique_append ( toReturn . transitions [ c ] , & s1 )
}
return toReturn
}
func alternate ( s1 * State , s2 * State ) * State {
toReturn := & State { }
toReturn . transitions = make ( map [ int ] [ ] * State )
toReturn . output = append ( toReturn . output , s1 . output ... )
toReturn . output = append ( toReturn . output , s2 . output ... )
// Unique append is used here (and elsewhere) to ensure that,
// for any given transition, a state can only be mentioned once.
// For example, given the transition 'a', the state 's1' can only be mentioned once.
// This would lead to multiple instances of the same set of match indices, since both
// 's1' states would be considered to match.
for _ , c := range s1 . content {
toReturn . transitions [ c ] , _ = unique_append ( toReturn . transitions [ c ] , s1 )
}
for _ , c := range s2 . content {
toReturn . transitions [ c ] , _ = unique_append ( toReturn . transitions [ c ] , s2 )
}
toReturn . content = newContents ( EPSILON )
toReturn . isEmpty = true
return toReturn
}