Compare commits
4 Commits
5bb06900cc
...
833dd269a8
| Author | SHA1 | Date | |
|---|---|---|---|
| 833dd269a8 | |||
| ecab7cc522 | |||
| 8b6d35c106 | |||
| 5e6435d8a7 |
39
compile.go
39
compile.go
@@ -112,7 +112,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
// Convert the string to a slice of runes to allow iteration through it
|
// Convert the string to a slice of runes to allow iteration through it
|
||||||
re_runes_orig := []rune(re) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges)
|
re_runes_orig := []rune(re) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges)
|
||||||
re_runes := make([]rune, 0)
|
re_runes := make([]rune, 0)
|
||||||
// Check for numeric range. If we are at the start of a numeric range,
|
// The following checks are performed here:
|
||||||
|
// 1. Check for numeric range. If we are at the start of a numeric range,
|
||||||
// skip to end and construct the equivalent regex for the range.
|
// skip to end and construct the equivalent regex for the range.
|
||||||
// The reason this is outside the loop below, is that it actually modifies
|
// The reason this is outside the loop below, is that it actually modifies
|
||||||
// the given regex (we 'cut' the numeric range and 'paste' an equivalent regex).
|
// the given regex (we 'cut' the numeric range and 'paste' an equivalent regex).
|
||||||
@@ -122,13 +123,19 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
// complexity.
|
// complexity.
|
||||||
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
|
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
|
||||||
//
|
//
|
||||||
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
// 2. Check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
||||||
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
||||||
//
|
//
|
||||||
// Another check is made for unescaped brackets - opening brackets are replaced with LBRACKET and closing brackets are replaced with RBRACKET.
|
// 3. Another check is made for unescaped brackets - opening brackets are replaced with
|
||||||
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
|
// LBRACKET and closing brackets are replaced with RBRACKET.
|
||||||
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
|
//
|
||||||
|
// 4. Check for escaped backslashes. Replace these with the BACKSLASH
|
||||||
|
// metacharacter. Later, in thompson(), these will be converted back. This avoids
|
||||||
|
// confusion in detecting whether a character is escaped eg. detecting
|
||||||
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
||||||
|
//
|
||||||
|
// 5. Check for non-greedy operators. These are not supported at the moment, so an error
|
||||||
|
// must be thrown if the user attempts to use a non-greedy operator.
|
||||||
for i := 0; i < len(re_runes_orig); i++ {
|
for i := 0; i < len(re_runes_orig); i++ {
|
||||||
c := re_runes_orig[i]
|
c := re_runes_orig[i]
|
||||||
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
|
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
|
||||||
@@ -172,6 +179,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
|||||||
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
|
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
|
||||||
re_runes = append(re_runes, RBRACKET)
|
re_runes = append(re_runes, RBRACKET)
|
||||||
continue
|
continue
|
||||||
|
} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
|
||||||
|
return nil, fmt.Errorf("non-greedy operators are not supported")
|
||||||
} else {
|
} else {
|
||||||
re_runes = append(re_runes, c)
|
re_runes = append(re_runes, c)
|
||||||
}
|
}
|
||||||
@@ -1001,15 +1010,24 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return Reg{}, fmt.Errorf("error applying kleene star")
|
return Reg{}, fmt.Errorf("error applying kleene star")
|
||||||
}
|
}
|
||||||
stateToAdd := kleene(*s1)
|
stateToAdd, err := kleene(*s1)
|
||||||
|
if err != nil {
|
||||||
|
return Reg{}, err
|
||||||
|
}
|
||||||
nfa = append(nfa, stateToAdd)
|
nfa = append(nfa, stateToAdd)
|
||||||
case PLUS: // a+ is equivalent to aa*
|
case PLUS: // a+ is equivalent to aa*
|
||||||
s1 := mustPop(&nfa)
|
s1 := mustPop(&nfa)
|
||||||
s2 := kleene(*s1)
|
s2, err := kleene(*s1)
|
||||||
|
if err != nil {
|
||||||
|
return Reg{}, err
|
||||||
|
}
|
||||||
s1 = concatenate(s1, s2)
|
s1 = concatenate(s1, s2)
|
||||||
nfa = append(nfa, s1)
|
nfa = append(nfa, s1)
|
||||||
case QUESTION: // ab? is equivalent to a(b|)
|
case QUESTION: // ab? is equivalent to a(b|)
|
||||||
s1 := mustPop(&nfa)
|
s1, err := pop(&nfa)
|
||||||
|
if err != nil {
|
||||||
|
return Reg{}, fmt.Errorf("error applying question operator")
|
||||||
|
}
|
||||||
s2 := question(s1)
|
s2 := question(s1)
|
||||||
nfa = append(nfa, s2)
|
nfa = append(nfa, s2)
|
||||||
case PIPE:
|
case PIPE:
|
||||||
@@ -1062,7 +1080,10 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
stateToAdd = concatenate(stateToAdd, cloneState(state))
|
stateToAdd = concatenate(stateToAdd, cloneState(state))
|
||||||
}
|
}
|
||||||
if c.endReps == INFINITE_REPS { // Case 3
|
if c.endReps == INFINITE_REPS { // Case 3
|
||||||
s2 := kleene(*state)
|
s2, err := kleene(*state)
|
||||||
|
if err != nil {
|
||||||
|
return Reg{}, err
|
||||||
|
}
|
||||||
stateToAdd = concatenate(stateToAdd, s2)
|
stateToAdd = concatenate(stateToAdd, s2)
|
||||||
} else { // Case 2
|
} else { // Case 2
|
||||||
for i := c.startReps; i < c.endReps; i++ {
|
for i := c.startReps; i < c.endReps; i++ {
|
||||||
|
|||||||
9
nfa.go
9
nfa.go
@@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"slices"
|
"slices"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -268,7 +269,11 @@ func concatenate(s1 *State, s2 *State) *State {
|
|||||||
return s1
|
return s1
|
||||||
}
|
}
|
||||||
|
|
||||||
func kleene(s1 State) *State {
|
func kleene(s1 State) (*State, error) {
|
||||||
|
if s1.isEmpty && s1.assert != NONE {
|
||||||
|
return nil, fmt.Errorf("previous token is not quantifiable")
|
||||||
|
}
|
||||||
|
|
||||||
toReturn := &State{}
|
toReturn := &State{}
|
||||||
toReturn.transitions = make(map[int][]*State)
|
toReturn.transitions = make(map[int][]*State)
|
||||||
toReturn.content = newContents(EPSILON)
|
toReturn.content = newContents(EPSILON)
|
||||||
@@ -283,7 +288,7 @@ func kleene(s1 State) *State {
|
|||||||
for _, c := range s1.content {
|
for _, c := range s1.content {
|
||||||
toReturn.transitions[c], _ = unique_append(toReturn.transitions[c], &s1)
|
toReturn.transitions[c], _ = unique_append(toReturn.transitions[c], &s1)
|
||||||
}
|
}
|
||||||
return toReturn
|
return toReturn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func alternate(s1 *State, s2 *State) *State {
|
func alternate(s1 *State, s2 *State) *State {
|
||||||
|
|||||||
24
re_test.go
24
re_test.go
@@ -465,6 +465,19 @@ var reTests = []struct {
|
|||||||
{`[\t][\n][\v][\r][\f][\b]`, nil, "\t\n\v\r\f\b", []Group{{0, 6}}},
|
{`[\t][\n][\v][\r][\f][\b]`, nil, "\t\n\v\r\f\b", []Group{{0, 6}}},
|
||||||
{`.*d`, nil, "abc\nabd", []Group{{4, 7}}},
|
{`.*d`, nil, "abc\nabd", []Group{{4, 7}}},
|
||||||
{`(`, nil, "-", nil},
|
{`(`, nil, "-", nil},
|
||||||
|
{`[\41]`, nil, `!`, []Group{{0, 1}}},
|
||||||
|
{`(?<!abc)(d.f)`, nil, `abcdefdof`, []Group{{6, 9}}},
|
||||||
|
{`[\w-]+`, nil, `laser_beam`, []Group{{0, 10}}},
|
||||||
|
{`M+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
|
||||||
|
{`m+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
|
||||||
|
{`[M]+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
|
||||||
|
{`[m]+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
|
||||||
|
{`^*`, nil, `-`, nil},
|
||||||
|
{`a[^>]*b`, nil, `a>b`, []Group{}},
|
||||||
|
{`^a*$`, nil, `foo`, []Group{}},
|
||||||
|
|
||||||
|
{`*?`, nil, `-`, nil},
|
||||||
|
{`a*?`, nil, `-`, nil}, // non-greedy operators are not supported
|
||||||
|
|
||||||
// Todo - add numeric range tests
|
// Todo - add numeric range tests
|
||||||
}
|
}
|
||||||
@@ -604,7 +617,7 @@ var groupTests = []struct {
|
|||||||
{`a(?:b|c|d)(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
|
{`a(?:b|c|d)(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
|
||||||
{`a(?:b|c|d)*(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
|
{`a(?:b|c|d)*(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
|
||||||
{`a(?:b|c|d)+(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
|
{`a(?:b|c|d)+(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
|
||||||
{`a(?:b|(c|e){1,2}?|d)+?(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {1, 2}, {2, 3}}}},
|
{`a(?:b|(c|e){1,2}?|d)+(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {1, 2}, {2, 3}}}},
|
||||||
{`(?<!-):(.*)(?<!-):`, nil, `a:bc-:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
|
{`(?<!-):(.*)(?<!-):`, nil, `a:bc-:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
|
||||||
{`(?<!\\):(.*)(?<!\\):`, nil, `a:bc\:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
|
{`(?<!\\):(.*)(?<!\\):`, nil, `a:bc\:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
|
||||||
{`(?<!\?)'(.*)(?<!\?)'`, nil, `a'bc?'de'f`, []Match{[]Group{{1, 9}, {2, 8}}}},
|
{`(?<!\?)'(.*)(?<!\?)'`, nil, `a'bc?'de'f`, []Match{[]Group{{1, 9}, {2, 8}}}},
|
||||||
@@ -612,6 +625,15 @@ var groupTests = []struct {
|
|||||||
{`([\s]*)([\S]*)([\s]*)`, nil, ` testing!1972`, []Match{[]Group{{0, 13}, {0, 1}, {1, 13}, {13, 13}}, []Group{{13, 13}, {13, 13}, {13, 13}, {13, 13}}}},
|
{`([\s]*)([\S]*)([\s]*)`, nil, ` testing!1972`, []Match{[]Group{{0, 13}, {0, 1}, {1, 13}, {13, 13}}, []Group{{13, 13}, {13, 13}, {13, 13}, {13, 13}}}},
|
||||||
{`(\s*)(\S*)(\s*)`, nil, ` testing!1972`, []Match{[]Group{{0, 13}, {0, 1}, {1, 13}, {13, 13}}, []Group{{13, 13}, {13, 13}, {13, 13}, {13, 13}}}},
|
{`(\s*)(\S*)(\s*)`, nil, ` testing!1972`, []Match{[]Group{{0, 13}, {0, 1}, {1, 13}, {13, 13}}, []Group{{13, 13}, {13, 13}, {13, 13}, {13, 13}}}},
|
||||||
{`(([a-z]+):)?([a-z]+)$`, nil, `smil`, []Match{[]Group{{0, 4}, {-1, -1}, {-1, -1}, {0, 4}}}},
|
{`(([a-z]+):)?([a-z]+)$`, nil, `smil`, []Match{[]Group{{0, 4}, {-1, -1}, {-1, -1}, {0, 4}}}},
|
||||||
|
|
||||||
|
{`(x?)?`, nil, `x`, []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}}},
|
||||||
|
{`"(?:\\"|[^"])*"`, nil, `"\""`, []Match{[]Group{{0, 4}}}},
|
||||||
|
|
||||||
|
{`^((a)c)?(ab)$`, nil, `ab`, []Match{[]Group{{0, 2}, {-1, -1}, {-1, -1}, {0, 2}}}},
|
||||||
|
|
||||||
|
{`^([ab]*)(?=(b)?)c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
|
||||||
|
{`^([ab]*)(?!(b))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
|
||||||
|
{`^([ab]*)(?<!(a))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFindAllMatches(t *testing.T) {
|
func TestFindAllMatches(t *testing.T) {
|
||||||
|
|||||||
Reference in New Issue
Block a user