Changed the value of EPSILON, so that we can use the NUL character
(which it used to be) in a regex; Also added code to detect escaped backslashes Specifically, I replace an escaped backslash with a metacharacter, then replace it back later on. This prevents problems, like detecting whether the opening bracket is escaped in '\\[a]'.
This commit is contained in:
		
							
								
								
									
										12
									
								
								compile.go
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								compile.go
									
									
									
									
									
								
							@@ -81,6 +81,10 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 | 
				
			|||||||
	//
 | 
						//
 | 
				
			||||||
	// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
 | 
						// Also check for non-capturing groups. The LPAREN of a non-capturing group looks like this: '(?:'
 | 
				
			||||||
	// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
 | 
						// I take this out, and put in a special character - NONCAPLPAREN_CHAR.
 | 
				
			||||||
 | 
						//
 | 
				
			||||||
 | 
						// Finally, check for escaped backslashes. Replace these with the BACKSLASH metacharacter. Later, in thompson(),
 | 
				
			||||||
 | 
						// these will be converted back. This avoids confusiuon in detecting whether a character is escaped eg. detecting
 | 
				
			||||||
 | 
						// whether '\\[a]' has an escaped opening bracket (it doesn't).
 | 
				
			||||||
	for i := 0; i < len(re_runes_orig); i++ {
 | 
						for i := 0; i < len(re_runes_orig); i++ {
 | 
				
			||||||
		c := re_runes_orig[i]
 | 
							c := re_runes_orig[i]
 | 
				
			||||||
		if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
 | 
							if c == '<' && (i == 0 || (re_runes_orig[i-1] != '\\' && re_runes_orig[i-1] != '?')) {
 | 
				
			||||||
@@ -115,6 +119,9 @@ func shuntingYard(re string, flags ...ReFlag) ([]postfixNode, error) {
 | 
				
			|||||||
		} else if c == '(' && i < len(re_runes_orig)-2 && re_runes_orig[i+1] == '?' && re_runes_orig[i+2] == ':' {
 | 
							} else if c == '(' && i < len(re_runes_orig)-2 && re_runes_orig[i+1] == '?' && re_runes_orig[i+2] == ':' {
 | 
				
			||||||
			re_runes = append(re_runes, NONCAPLPAREN_CHAR)
 | 
								re_runes = append(re_runes, NONCAPLPAREN_CHAR)
 | 
				
			||||||
			i += 2
 | 
								i += 2
 | 
				
			||||||
 | 
							} else if c == '\\' && i < len(re_runes_orig)-1 && re_runes_orig[i+1] == '\\' { // Escaped backslash
 | 
				
			||||||
 | 
								re_runes = append(re_runes, ESC_BACKSLASH)
 | 
				
			||||||
 | 
								i++
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			re_runes = append(re_runes, c)
 | 
								re_runes = append(re_runes, c)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@@ -671,6 +678,11 @@ func thompson(re []postfixNode) (Reg, error) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								// Replace ESC_BACKSLASH with actual backslash, so that we can actually check if we encounter it
 | 
				
			||||||
 | 
								replaceByValue([]int(state.content), int(ESC_BACKSLASH), '\\')
 | 
				
			||||||
 | 
								replaceByValue(state.except, ESC_BACKSLASH, '\\')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			nfa = append(nfa, &state)
 | 
								nfa = append(nfa, &state)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		if c.nodetype == LPAREN || c.nodetype == RPAREN {
 | 
							if c.nodetype == LPAREN || c.nodetype == RPAREN {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user