Compare commits
4 Commits
5bb06900cc
...
833dd269a8
| Author | SHA1 | Date | |
|---|---|---|---|
| 833dd269a8 | |||
| ecab7cc522 | |||
| 8b6d35c106 | |||
| 5e6435d8a7 |
59
compile.go
59
compile.go
@@ -112,23 +112,30 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
// Convert the string to a slice of runes to allow iteration through it
|
||||
re_runes_orig := []rune(re) // This is the rune slice before the first parsing loop (which detects and replaces numeric ranges)
|
||||
re_runes := make([]rune, 0)
|
||||
// Check for numeric range. If we are at the start of a numeric range,
|
||||
// skip to end and construct the equivalent regex for the range.
|
||||
// The reason this is outside the loop below, is that it actually modifies
|
||||
// the given regex (we 'cut' the numeric range and 'paste' an equivalent regex).
|
||||
// It also makes the overall parsing easier, since I don't have to worry about the numeric range
|
||||
// anymore.
|
||||
// Eventually, I might be able to add it into the main parsing loop, to reduce the time
|
||||
// complexity.
|
||||
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
|
||||
// The following checks are performed here:
|
||||
// 1. Check for numeric range. If we are at the start of a numeric range,
|
||||
// skip to end and construct the equivalent regex for the range.
|
||||
// The reason this is outside the loop below, is that it actually modifies
|
||||
// the given regex (we 'cut' the numeric range and 'paste' an equivalent regex).
|
||||
// It also makes the overall parsing easier, since I don't have to worry about the numeric range
|
||||
// anymore.
|
||||
// Eventually, I might be able to add it into the main parsing loop, to reduce the time
|
||||
// complexity.
|
||||
// A numeric range has the syntax: <num1-num2>. Ir matches all numbers in this range.
|
||||
//
|
||||
// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
||||
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
||||
// 2. Check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
|
||||
// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
|
||||
//
|
||||
// Another check is made for unescaped brackets - opening brackets are replaced with LBRACKET and closing brackets are replaced with RBRACKET.
|
||||
// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
|
||||
// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
|
||||
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
||||
// 3. Another check is made for unescaped brackets - opening brackets are replaced with
|
||||
// LBRACKET and closing brackets are replaced with RBRACKET.
|
||||
//
|
||||
// 4. Check for escaped backslashes. Replace these with the BACKSLASH
|
||||
// metacharacter. Later, in thompson(), these will be converted back. This avoids
|
||||
// confusion in detecting whether a character is escaped eg. detecting
|
||||
// whether '\\[a]' has an escaped opening bracket (it doesn't).
|
||||
//
|
||||
// 5. Check for non-greedy operators. These are not supported at the moment, so an error
|
||||
// must be thrown if the user attempts to use a non-greedy operator.
|
||||
for i := 0; i < len(re_runes_orig); i++ {
|
||||
c := re_runes_orig[i]
|
||||
if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
|
||||
@@ -172,6 +179,8 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
|
||||
} else if c == ']' && (i == 0 || re_runes[len(re_runes)-1] != '\\') {
|
||||
re_runes = append(re_runes, RBRACKET)
|
||||
continue
|
||||
} else if slices.Contains([]rune{'+', '*', '?'}, c) && (i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '?') {
|
||||
return nil, fmt.Errorf("non-greedy operators are not supported")
|
||||
} else {
|
||||
re_runes = append(re_runes, c)
|
||||
}
|
||||
@@ -1001,15 +1010,24 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("error applying kleene star")
|
||||
}
|
||||
stateToAdd := kleene(*s1)
|
||||
stateToAdd, err := kleene(*s1)
|
||||
if err != nil {
|
||||
return Reg{}, err
|
||||
}
|
||||
nfa = append(nfa, stateToAdd)
|
||||
case PLUS: // a+ is equivalent to aa*
|
||||
s1 := mustPop(&nfa)
|
||||
s2 := kleene(*s1)
|
||||
s2, err := kleene(*s1)
|
||||
if err != nil {
|
||||
return Reg{}, err
|
||||
}
|
||||
s1 = concatenate(s1, s2)
|
||||
nfa = append(nfa, s1)
|
||||
case QUESTION: // ab? is equivalent to a(b|)
|
||||
s1 := mustPop(&nfa)
|
||||
s1, err := pop(&nfa)
|
||||
if err != nil {
|
||||
return Reg{}, fmt.Errorf("error applying question operator")
|
||||
}
|
||||
s2 := question(s1)
|
||||
nfa = append(nfa, s2)
|
||||
case PIPE:
|
||||
@@ -1062,7 +1080,10 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
stateToAdd = concatenate(stateToAdd, cloneState(state))
|
||||
}
|
||||
if c.endReps == INFINITE_REPS { // Case 3
|
||||
s2 := kleene(*state)
|
||||
s2, err := kleene(*state)
|
||||
if err != nil {
|
||||
return Reg{}, err
|
||||
}
|
||||
stateToAdd = concatenate(stateToAdd, s2)
|
||||
} else { // Case 2
|
||||
for i := c.startReps; i < c.endReps; i++ {
|
||||
|
||||
9
nfa.go
9
nfa.go
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
)
|
||||
|
||||
@@ -268,7 +269,11 @@ func concatenate(s1 *State, s2 *State) *State {
|
||||
return s1
|
||||
}
|
||||
|
||||
func kleene(s1 State) *State {
|
||||
func kleene(s1 State) (*State, error) {
|
||||
if s1.isEmpty && s1.assert != NONE {
|
||||
return nil, fmt.Errorf("previous token is not quantifiable")
|
||||
}
|
||||
|
||||
toReturn := &State{}
|
||||
toReturn.transitions = make(map[int][]*State)
|
||||
toReturn.content = newContents(EPSILON)
|
||||
@@ -283,7 +288,7 @@ func kleene(s1 State) *State {
|
||||
for _, c := range s1.content {
|
||||
toReturn.transitions[c], _ = unique_append(toReturn.transitions[c], &s1)
|
||||
}
|
||||
return toReturn
|
||||
return toReturn, nil
|
||||
}
|
||||
|
||||
func alternate(s1 *State, s2 *State) *State {
|
||||
|
||||
24
re_test.go
24
re_test.go
@@ -465,6 +465,19 @@ var reTests = []struct {
|
||||
{`[\t][\n][\v][\r][\f][\b]`, nil, "\t\n\v\r\f\b", []Group{{0, 6}}},
|
||||
{`.*d`, nil, "abc\nabd", []Group{{4, 7}}},
|
||||
{`(`, nil, "-", nil},
|
||||
{`[\41]`, nil, `!`, []Group{{0, 1}}},
|
||||
{`(?<!abc)(d.f)`, nil, `abcdefdof`, []Group{{6, 9}}},
|
||||
{`[\w-]+`, nil, `laser_beam`, []Group{{0, 10}}},
|
||||
{`M+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
|
||||
{`m+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
|
||||
{`[M]+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
|
||||
{`[m]+`, []ReFlag{RE_CASE_INSENSITIVE}, `MMM`, []Group{{0, 3}}},
|
||||
{`^*`, nil, `-`, nil},
|
||||
{`a[^>]*b`, nil, `a>b`, []Group{}},
|
||||
{`^a*$`, nil, `foo`, []Group{}},
|
||||
|
||||
{`*?`, nil, `-`, nil},
|
||||
{`a*?`, nil, `-`, nil}, // non-greedy operators are not supported
|
||||
|
||||
// Todo - add numeric range tests
|
||||
}
|
||||
@@ -604,7 +617,7 @@ var groupTests = []struct {
|
||||
{`a(?:b|c|d)(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
|
||||
{`a(?:b|c|d)*(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
|
||||
{`a(?:b|c|d)+(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {2, 3}}}},
|
||||
{`a(?:b|(c|e){1,2}?|d)+?(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {1, 2}, {2, 3}}}},
|
||||
{`a(?:b|(c|e){1,2}?|d)+(.)`, nil, `ace`, []Match{[]Group{{0, 3}, {1, 2}, {2, 3}}}},
|
||||
{`(?<!-):(.*)(?<!-):`, nil, `a:bc-:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
|
||||
{`(?<!\\):(.*)(?<!\\):`, nil, `a:bc\:de:f`, []Match{[]Group{{1, 9}, {2, 8}}}},
|
||||
{`(?<!\?)'(.*)(?<!\?)'`, nil, `a'bc?'de'f`, []Match{[]Group{{1, 9}, {2, 8}}}},
|
||||
@@ -612,6 +625,15 @@ var groupTests = []struct {
|
||||
{`([\s]*)([\S]*)([\s]*)`, nil, ` testing!1972`, []Match{[]Group{{0, 13}, {0, 1}, {1, 13}, {13, 13}}, []Group{{13, 13}, {13, 13}, {13, 13}, {13, 13}}}},
|
||||
{`(\s*)(\S*)(\s*)`, nil, ` testing!1972`, []Match{[]Group{{0, 13}, {0, 1}, {1, 13}, {13, 13}}, []Group{{13, 13}, {13, 13}, {13, 13}, {13, 13}}}},
|
||||
{`(([a-z]+):)?([a-z]+)$`, nil, `smil`, []Match{[]Group{{0, 4}, {-1, -1}, {-1, -1}, {0, 4}}}},
|
||||
|
||||
{`(x?)?`, nil, `x`, []Match{[]Group{{0, 1}, {0, 1}}, []Group{{1, 1}, {1, 1}}}},
|
||||
{`"(?:\\"|[^"])*"`, nil, `"\""`, []Match{[]Group{{0, 4}}}},
|
||||
|
||||
{`^((a)c)?(ab)$`, nil, `ab`, []Match{[]Group{{0, 2}, {-1, -1}, {-1, -1}, {0, 2}}}},
|
||||
|
||||
{`^([ab]*)(?=(b)?)c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
|
||||
{`^([ab]*)(?!(b))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
|
||||
{`^([ab]*)(?<!(a))c`, nil, `abc`, []Match{[]Group{{0, 3}, {0, 2}}}},
|
||||
}
|
||||
|
||||
func TestFindAllMatches(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user