Added support for start-of-input (\A) and end-of-input (\Z) assertions

2025-01-30 13:56:56 -05:00
parent db7c884b83
commit ee51e39d59
3 changed files with 24 additions and 2 deletions
--- a/regex/compile.go
+++ b/regex/compile.go
@@ -878,6 +878,10 @@ func thompson(re []postfixNode) (Reg, error) {
 						stateToAdd.assert = wboundAssert
 					case 'B':
 						stateToAdd.assert = nonwboundAssert
 					case 'A':
 						stateToAdd.assert = soiAssert
 					case 'Z':
 						stateToAdd.assert = eoiAssert
 					}
 				} else { // Lookaround
 					stateToAdd.lookaroundRegex = string(c.contents)
--- a/regex/nfa.go
+++ b/regex/nfa.go
@@ -11,8 +11,10 @@ type assertType int
 const (
 	noneAssert assertType = iota
-	sosAssert
+	sosAssert             // Start of string (^)
-	eosAssert
+	soiAssert             // Start of input (\A)
 	eosAssert             // End of string ($)
 	eoiAssert             // End of input (\Z)
 	wboundAssert
 	nonwboundAssert
 	plaAssert        // Positive lookahead
@@ -119,6 +121,15 @@ func (s nfaState) checkAssertion(str []rune, idx int) bool {
 		// Index is at the end of the string, or it points to the last character which is a newline
 		return idx == len(str) || (multilineMode && str[idx] == '\n')
 	}
 	if s.assert == soiAssert {
 		// Only true at the start of the input, regardless of mode
 		return idx == 0
 	}
 	if s.assert == eoiAssert {
 		// Only true at the end of the input, regardless of mode
 		return idx == len(str)
 	}
 	if s.assert == wboundAssert {
 		return isWordBoundary(str, idx)
 	}
--- a/regex/postfixNode.go
+++ b/regex/postfixNode.go
@@ -98,6 +98,13 @@ func newEscapedNode(c rune, inCharClass bool) (postfixNode, error) {
 		if c == 'B' && inCharClass { // Invalid
 			return postfixNode{}, fmt.Errorf("word boundaries are not allowed in character class")
 		}
 	case 'A', 'Z': // A is start of input, Z is end of input (regardless of RE_MULTILINE)
 		if inCharClass {
 			return postfixNode{}, fmt.Errorf("input boundaries are not allowed in character class")
 		} else {
 			toReturn.nodetype = assertionNode
 			toReturn.contents = append(toReturn.contents, c)
 		}
 	case 'n': // Newline character
 		toReturn.nodetype = characterNode
 		toReturn.contents = append(toReturn.contents, '\n')