Added support for start-of-input (\A) and end-of-input (\Z) assertions
This commit is contained in:
@@ -878,6 +878,10 @@ func thompson(re []postfixNode) (Reg, error) {
|
||||
stateToAdd.assert = wboundAssert
|
||||
case 'B':
|
||||
stateToAdd.assert = nonwboundAssert
|
||||
case 'A':
|
||||
stateToAdd.assert = soiAssert
|
||||
case 'Z':
|
||||
stateToAdd.assert = eoiAssert
|
||||
}
|
||||
} else { // Lookaround
|
||||
stateToAdd.lookaroundRegex = string(c.contents)
|
||||
|
15
regex/nfa.go
15
regex/nfa.go
@@ -11,8 +11,10 @@ type assertType int
|
||||
|
||||
const (
|
||||
noneAssert assertType = iota
|
||||
sosAssert
|
||||
eosAssert
|
||||
sosAssert // Start of string (^)
|
||||
soiAssert // Start of input (\A)
|
||||
eosAssert // End of string ($)
|
||||
eoiAssert // End of input (\Z)
|
||||
wboundAssert
|
||||
nonwboundAssert
|
||||
plaAssert // Positive lookahead
|
||||
@@ -119,6 +121,15 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
|
||||
// Index is at the end of the string, or it points to the last character which is a newline
|
||||
return idx == len(str) || (multilineMode && str[idx] == '\n')
|
||||
}
|
||||
if s.assert == soiAssert {
|
||||
// Only true at the start of the input, regardless of mode
|
||||
return idx == 0
|
||||
}
|
||||
if s.assert == eoiAssert {
|
||||
// Only true at the end of the input, regardless of mode
|
||||
return idx == len(str)
|
||||
}
|
||||
|
||||
if s.assert == wboundAssert {
|
||||
return isWordBoundary(str, idx)
|
||||
}
|
||||
|
@@ -98,6 +98,13 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
|
||||
if c == 'B' && inCharClass { // Invalid
|
||||
return postfixNode{}, fmt.Errorf("word boundaries are not allowed in character class")
|
||||
}
|
||||
case 'A', 'Z': // A is start of input, Z is end of input (regardless of RE_MULTILINE)
|
||||
if inCharClass {
|
||||
return postfixNode{}, fmt.Errorf("input boundaries are not allowed in character class")
|
||||
} else {
|
||||
toReturn.nodetype = assertionNode
|
||||
toReturn.contents = append(toReturn.contents, c)
|
||||
}
|
||||
case 'n': // Newline character
|
||||
toReturn.nodetype = characterNode
|
||||
toReturn.contents = append(toReturn.contents, '\n')
|
||||
|
Reference in New Issue
Block a user