Commit d074b4e5 authored by Tom Lane's avatar Tom Lane

Fix regexp_matches() handling of zero-length matches.

We'd find the same match twice if it was of zero length and not immediately
adjacent to the previous match.  replace_text_regexp() got similar cases
right, so adjust this search logic to match that.  Note that even though
the regexp_split_to_xxx() functions share this code, they did not display
equivalent misbehavior, because the second match would be considered
degenerate and ignored.

Jeevan Chalke, with some cosmetic changes by me.
parent c876fb42
...@@ -957,14 +957,13 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags, ...@@ -957,14 +957,13 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
break; break;
/* /*
* Advance search position. Normally we start just after the end of * Advance search position. Normally we start the next search at the
* the previous match, but always advance at least one character (the * end of the previous match; but if the match was of zero length, we
* special case can occur if the pattern matches zero characters just * have to advance by one character, or we'd just find the same match
* after the prior match or at the end of the string). * again.
*/ */
if (start_search < pmatch[0].rm_eo) start_search = prev_match_end;
start_search = pmatch[0].rm_eo; if (pmatch[0].rm_so == pmatch[0].rm_eo)
else
start_search++; start_search++;
if (start_search > wide_len) if (start_search > wide_len)
break; break;
......
...@@ -3083,7 +3083,10 @@ replace_text_regexp(text *src_text, void *regexp, ...@@ -3083,7 +3083,10 @@ replace_text_regexp(text *src_text, void *regexp,
break; break;
/* /*
* Search from next character when the matching text is zero width. * Advance search position. Normally we start the next search at the
* end of the previous match; but if the match was of zero length, we
* have to advance by one character, or we'd just find the same match
* again.
*/ */
search_start = data_pos; search_start = data_pos;
if (pmatch[0].rm_so == pmatch[0].rm_eo) if (pmatch[0].rm_so == pmatch[0].rm_eo)
......
...@@ -440,6 +440,64 @@ SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$); ...@@ -440,6 +440,64 @@ SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
{barbeque} {barbeque}
(1 row) (1 row)
-- start/end-of-line matches are of zero length
SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '^', 'mg');
regexp_matches
----------------
{""}
{""}
{""}
{""}
(4 rows)
SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '$', 'mg');
regexp_matches
----------------
{""}
{""}
{""}
{""}
(4 rows)
SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '^.?', 'mg');
regexp_matches
----------------
{1}
{2}
{3}
{4}
{""}
(5 rows)
SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '.?$', 'mg');
regexp_matches
----------------
{""}
{1}
{""}
{2}
{""}
{3}
{""}
{4}
{""}
{""}
(10 rows)
SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4', '.?$', 'mg');
regexp_matches
----------------
{""}
{1}
{""}
{2}
{""}
{3}
{""}
{4}
{""}
(9 rows)
-- give me errors -- give me errors
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz'); SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz');
ERROR: invalid regexp option: "z" ERROR: invalid regexp option: "z"
......
...@@ -158,6 +158,13 @@ SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$); ...@@ -158,6 +158,13 @@ SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
-- no capture groups -- no capture groups
SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$); SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
-- start/end-of-line matches are of zero length
SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '^', 'mg');
SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '$', 'mg');
SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '^.?', 'mg');
SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '.?$', 'mg');
SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4', '.?$', 'mg');
-- give me errors -- give me errors
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz'); SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz');
SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$); SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment