Added support for start-of-input (\A) and end-of-input (\Z) assertions

master
Aadhavan Srinivasan 3 days ago
parent db7c884b83
commit ee51e39d59

@ -878,6 +878,10 @@ func thompson(re []postfixNode) (Reg, error) {
stateToAdd.assert = wboundAssert stateToAdd.assert = wboundAssert
case 'B': case 'B':
stateToAdd.assert = nonwboundAssert stateToAdd.assert = nonwboundAssert
case 'A':
stateToAdd.assert = soiAssert
case 'Z':
stateToAdd.assert = eoiAssert
} }
} else { // Lookaround } else { // Lookaround
stateToAdd.lookaroundRegex = string(c.contents) stateToAdd.lookaroundRegex = string(c.contents)

@ -11,8 +11,10 @@ type assertType int
const ( const (
noneAssert assertType = iota noneAssert assertType = iota
sosAssert sosAssert // Start of string (^)
eosAssert soiAssert // Start of input (\A)
eosAssert // End of string ($)
eoiAssert // End of input (\Z)
wboundAssert wboundAssert
nonwboundAssert nonwboundAssert
plaAssert // Positive lookahead plaAssert // Positive lookahead
@ -119,6 +121,15 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
// Index is at the end of the string, or it points to the last character which is a newline // Index is at the end of the string, or it points to the last character which is a newline
return idx == len(str) || (multilineMode && str[idx] == '\n') return idx == len(str) || (multilineMode && str[idx] == '\n')
} }
if s.assert == soiAssert {
// Only true at the start of the input, regardless of mode
return idx == 0
}
if s.assert == eoiAssert {
// Only true at the end of the input, regardless of mode
return idx == len(str)
}
if s.assert == wboundAssert { if s.assert == wboundAssert {
return isWordBoundary(str, idx) return isWordBoundary(str, idx)
} }

@ -98,6 +98,13 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
if c == 'B' && inCharClass { // Invalid if c == 'B' && inCharClass { // Invalid
return postfixNode{}, fmt.Errorf("word boundaries are not allowed in character class") return postfixNode{}, fmt.Errorf("word boundaries are not allowed in character class")
} }
case 'A', 'Z': // A is start of input, Z is end of input (regardless of RE_MULTILINE)
if inCharClass {
return postfixNode{}, fmt.Errorf("input boundaries are not allowed in character class")
} else {
toReturn.nodetype = assertionNode
toReturn.contents = append(toReturn.contents, c)
}
case 'n': // Newline character case 'n': // Newline character
toReturn.nodetype = characterNode toReturn.nodetype = characterNode
toReturn.contents = append(toReturn.contents, '\n') toReturn.contents = append(toReturn.contents, '\n')

Loading…
Cancel
Save