Added support for start-of-input (\A) and end-of-input (\Z) assertions
This commit is contained in:
@@ -878,6 +878,10 @@ func thompson(re []postfixNode) (Reg, error) {
|
|||||||
stateToAdd.assert = wboundAssert
|
stateToAdd.assert = wboundAssert
|
||||||
case 'B':
|
case 'B':
|
||||||
stateToAdd.assert = nonwboundAssert
|
stateToAdd.assert = nonwboundAssert
|
||||||
|
case 'A':
|
||||||
|
stateToAdd.assert = soiAssert
|
||||||
|
case 'Z':
|
||||||
|
stateToAdd.assert = eoiAssert
|
||||||
}
|
}
|
||||||
} else { // Lookaround
|
} else { // Lookaround
|
||||||
stateToAdd.lookaroundRegex = string(c.contents)
|
stateToAdd.lookaroundRegex = string(c.contents)
|
||||||
|
15
regex/nfa.go
15
regex/nfa.go
@@ -11,8 +11,10 @@ type assertType int
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
noneAssert assertType = iota
|
noneAssert assertType = iota
|
||||||
sosAssert
|
sosAssert // Start of string (^)
|
||||||
eosAssert
|
soiAssert // Start of input (\A)
|
||||||
|
eosAssert // End of string ($)
|
||||||
|
eoiAssert // End of input (\Z)
|
||||||
wboundAssert
|
wboundAssert
|
||||||
nonwboundAssert
|
nonwboundAssert
|
||||||
plaAssert // Positive lookahead
|
plaAssert // Positive lookahead
|
||||||
@@ -119,6 +121,15 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
|
|||||||
// Index is at the end of the string, or it points to the last character which is a newline
|
// Index is at the end of the string, or it points to the last character which is a newline
|
||||||
return idx == len(str) || (multilineMode && str[idx] == '\n')
|
return idx == len(str) || (multilineMode && str[idx] == '\n')
|
||||||
}
|
}
|
||||||
|
if s.assert == soiAssert {
|
||||||
|
// Only true at the start of the input, regardless of mode
|
||||||
|
return idx == 0
|
||||||
|
}
|
||||||
|
if s.assert == eoiAssert {
|
||||||
|
// Only true at the end of the input, regardless of mode
|
||||||
|
return idx == len(str)
|
||||||
|
}
|
||||||
|
|
||||||
if s.assert == wboundAssert {
|
if s.assert == wboundAssert {
|
||||||
return isWordBoundary(str, idx)
|
return isWordBoundary(str, idx)
|
||||||
}
|
}
|
||||||
|
@@ -98,6 +98,13 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
|
|||||||
if c == 'B' && inCharClass { // Invalid
|
if c == 'B' && inCharClass { // Invalid
|
||||||
return postfixNode{}, fmt.Errorf("word boundaries are not allowed in character class")
|
return postfixNode{}, fmt.Errorf("word boundaries are not allowed in character class")
|
||||||
}
|
}
|
||||||
|
case 'A', 'Z': // A is start of input, Z is end of input (regardless of RE_MULTILINE)
|
||||||
|
if inCharClass {
|
||||||
|
return postfixNode{}, fmt.Errorf("input boundaries are not allowed in character class")
|
||||||
|
} else {
|
||||||
|
toReturn.nodetype = assertionNode
|
||||||
|
toReturn.contents = append(toReturn.contents, c)
|
||||||
|
}
|
||||||
case 'n': // Newline character
|
case 'n': // Newline character
|
||||||
toReturn.nodetype = characterNode
|
toReturn.nodetype = characterNode
|
||||||
toReturn.contents = append(toReturn.contents, '\n')
|
toReturn.contents = append(toReturn.contents, '\n')
|
||||||
|
Reference in New Issue
Block a user