Commit 76e8a87f authored by Tom Lane's avatar Tom Lane

Teach regex_fixed_prefix() the correct handling of advanced regex

escapes --- they aren't simply quoted characters.  Problem noted by
Antti Salmela.  Also fix problem with incorrect handling of multibyte
characters when followed by a quantifier.
parent 4e91824b
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.168 2004/12/02 02:45:07 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -3459,6 +3459,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, ...@@ -3459,6 +3459,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
char *match; char *match;
int pos, int pos,
match_pos, match_pos,
prev_pos,
prev_match_pos,
paren_depth; paren_depth;
char *patt; char *patt;
char *rest; char *rest;
...@@ -3519,11 +3521,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, ...@@ -3519,11 +3521,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
/* OK, allocate space for pattern */ /* OK, allocate space for pattern */
match = palloc(strlen(patt) + 1); match = palloc(strlen(patt) + 1);
match_pos = 0; prev_match_pos = match_pos = 0;
/* note start at pos 1 to skip leading ^ */ /* note start at pos 1 to skip leading ^ */
for (pos = 1; patt[pos]; pos++) for (prev_pos = pos = 1; patt[pos]; )
{ {
int len;
/* /*
* Check for characters that indicate multiple possible matches * Check for characters that indicate multiple possible matches
* here. XXX I suspect isalpha() is not an adequately * here. XXX I suspect isalpha() is not an adequately
...@@ -3537,6 +3541,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, ...@@ -3537,6 +3541,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
(case_insensitive && isalpha((unsigned char) patt[pos]))) (case_insensitive && isalpha((unsigned char) patt[pos])))
break; break;
/*
* In AREs, backslash followed by alphanumeric is an escape, not
* a quoted character. Must treat it as having multiple possible
* matches.
*/
if (patt[pos] == '\\' && isalnum((unsigned char) patt[pos + 1]))
break;
/* /*
* Check for quantifiers. Except for +, this means the preceding * Check for quantifiers. Except for +, this means the preceding
* character is optional, so we must remove it from the prefix * character is optional, so we must remove it from the prefix
...@@ -3546,14 +3558,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, ...@@ -3546,14 +3558,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
patt[pos] == '?' || patt[pos] == '?' ||
patt[pos] == '{') patt[pos] == '{')
{ {
if (match_pos > 0) match_pos = prev_match_pos;
match_pos--; pos = prev_pos;
pos--;
break; break;
} }
if (patt[pos] == '+') if (patt[pos] == '+')
{ {
pos--; pos = prev_pos;
break; break;
} }
if (patt[pos] == '\\') if (patt[pos] == '\\')
...@@ -3563,7 +3574,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, ...@@ -3563,7 +3574,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
if (patt[pos] == '\0') if (patt[pos] == '\0')
break; break;
} }
match[match_pos++] = patt[pos]; /* save position in case we need to back up on next loop cycle */
prev_match_pos = match_pos;
prev_pos = pos;
/* must use encoding-aware processing here */
len = pg_mblen(&patt[pos]);
memcpy(&match[match_pos], &patt[pos], len);
match_pos += len;
pos += len;
} }
match[match_pos] = '\0'; match[match_pos] = '\0';
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment