diff --git a/regex/compile.go b/regex/compile.go index 5fa56af..f858076 100644 --- a/regex/compile.go +++ b/regex/compile.go @@ -878,6 +878,10 @@ func thompson(re []postfixNode) (Reg, error) { stateToAdd.assert = wboundAssert case 'B': stateToAdd.assert = nonwboundAssert + case 'A': + stateToAdd.assert = soiAssert + case 'Z': + stateToAdd.assert = eoiAssert } } else { // Lookaround stateToAdd.lookaroundRegex = string(c.contents) diff --git a/regex/nfa.go b/regex/nfa.go index bb7c9b6..0dc4e2a 100644 --- a/regex/nfa.go +++ b/regex/nfa.go @@ -11,8 +11,10 @@ type assertType int const ( noneAssert assertType = iota - sosAssert - eosAssert + sosAssert // Start of string (^) + soiAssert // Start of input (\A) + eosAssert // End of string ($) + eoiAssert // End of input (\Z) wboundAssert nonwboundAssert plaAssert // Positive lookahead @@ -119,6 +121,15 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool { // Index is at the end of the string, or it points to the last character which is a newline return idx == len(str) || (multilineMode && str[idx] == '\n') } + if s.assert == soiAssert { + // Only true at the start of the input, regardless of mode + return idx == 0 + } + if s.assert == eoiAssert { + // Only true at the end of the input, regardless of mode + return idx == len(str) + } + if s.assert == wboundAssert { return isWordBoundary(str, idx) } diff --git a/regex/postfixNode.go b/regex/postfixNode.go index a6a3cd8..47f8c48 100644 --- a/regex/postfixNode.go +++ b/regex/postfixNode.go @@ -98,6 +98,13 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) { if c == 'B' && inCharClass { // Invalid return postfixNode{}, fmt.Errorf("word boundaries are not allowed in character class") } + case 'A', 'Z': // A is start of input, Z is end of input (regardless of RE_MULTILINE) + if inCharClass { + return postfixNode{}, fmt.Errorf("input boundaries are not allowed in character class") + } else { + toReturn.nodetype = assertionNode + toReturn.contents = append(toReturn.contents, c) + } case 'n': // Newline character toReturn.nodetype = characterNode toReturn.contents = append(toReturn.contents, '\n')