Commit 9d4ca013 authored by Tom Lane's avatar Tom Lane

Ensure that a tsquery like '!foo' matches empty tsvectors.

!foo means "the tsvector does not contain foo", and therefore it should
match an empty tsvector.  ts_match_vq() overenthusiastically supposed
that an empty tsvector could never match any query, so it forcibly
returned FALSE, the wrong answer.  Remove the premature optimization.

Our behavior on this point was inconsistent, because while seqscans and
GIST index searches both failed to match empty tsvectors, GIN index
searches would find them, since GIN scans don't rely on ts_match_vq().
That makes this certainly a bug, not a debatable definition disagreement,
so back-patch to all supported branches.

Report and diagnosis by Tom Dunstan (bug #14515); added test cases by me.

Discussion: https://postgr.es/m/20170126025524.1434.97828@wrigleys.postgresql.org
parent bdadf36e
...@@ -1927,7 +1927,8 @@ ts_match_vq(PG_FUNCTION_ARGS) ...@@ -1927,7 +1927,8 @@ ts_match_vq(PG_FUNCTION_ARGS)
CHKVAL chkval; CHKVAL chkval;
bool result; bool result;
if (!val->size || !query->size) /* empty query matches nothing */
if (!query->size)
{ {
PG_FREE_IF_COPY(val, 0); PG_FREE_IF_COPY(val, 0);
PG_FREE_IF_COPY(query, 1); PG_FREE_IF_COPY(query, 1);
......
...@@ -98,8 +98,108 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; ...@@ -98,8 +98,108 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
494 494
(1 row) (1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
count
-------
158
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
count
-------
0
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
count
-------
508
(1 row)
create index wowidx on test_tsvector using gist (a); create index wowidx on test_tsvector using gist (a);
SET enable_seqscan=OFF; SET enable_seqscan=OFF;
SET enable_indexscan=ON;
SET enable_bitmapscan=OFF;
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
QUERY PLAN
-------------------------------------------------------
Aggregate
-> Index Scan using wowidx on test_tsvector
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
(3 rows)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
count
-------
158
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
count
-------
17
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
count
-------
6
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
count
-------
98
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
count
-------
23
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
count
-------
39
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
count
-------
494
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
count
-------
158
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
count
-------
0
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
count
-------
508
(1 row)
SET enable_indexscan=OFF;
SET enable_bitmapscan=ON;
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
QUERY PLAN
-------------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on test_tsvector
Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
-> Bitmap Index Scan on wowidx
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
(5 rows)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
count count
------- -------
...@@ -148,10 +248,35 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}'); ...@@ -148,10 +248,35 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
158 158
(1 row) (1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
count
-------
0
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
count
-------
508
(1 row)
RESET enable_seqscan; RESET enable_seqscan;
RESET enable_indexscan;
RESET enable_bitmapscan;
DROP INDEX wowidx; DROP INDEX wowidx;
CREATE INDEX wowidx ON test_tsvector USING gin (a); CREATE INDEX wowidx ON test_tsvector USING gin (a);
SET enable_seqscan=OFF; SET enable_seqscan=OFF;
-- GIN only supports bitmapscan, so no need to test plain indexscan
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
QUERY PLAN
-------------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on test_tsvector
Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
-> Bitmap Index Scan on wowidx
Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
(5 rows)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
count count
------- -------
...@@ -200,6 +325,18 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}'); ...@@ -200,6 +325,18 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
158 158
(1 row) (1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
count
-------
0
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
count
-------
508
(1 row)
RESET enable_seqscan; RESET enable_seqscan;
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH'); INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10; SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10;
......
...@@ -773,6 +773,18 @@ select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true"; ...@@ -773,6 +773,18 @@ select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true";
t t
(1 row) (1 row)
select to_tsvector('simple', 'x y q y') @@ '!foo' AS "true";
true
------
t
(1 row)
select to_tsvector('simple', '') @@ '!foo' AS "true";
true
------
t
(1 row)
--ranking --ranking
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s'); SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
ts_rank ts_rank
......
...@@ -48,10 +48,17 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; ...@@ -48,10 +48,17 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
create index wowidx on test_tsvector using gist (a); create index wowidx on test_tsvector using gist (a);
SET enable_seqscan=OFF; SET enable_seqscan=OFF;
SET enable_indexscan=ON;
SET enable_bitmapscan=OFF;
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh'; SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
...@@ -61,14 +68,37 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; ...@@ -61,14 +68,37 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}'); SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
SET enable_indexscan=OFF;
SET enable_bitmapscan=ON;
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
RESET enable_seqscan; RESET enable_seqscan;
RESET enable_indexscan;
RESET enable_bitmapscan;
DROP INDEX wowidx; DROP INDEX wowidx;
CREATE INDEX wowidx ON test_tsvector USING gin (a); CREATE INDEX wowidx ON test_tsvector USING gin (a);
SET enable_seqscan=OFF; SET enable_seqscan=OFF;
-- GIN only supports bitmapscan, so no need to test plain indexscan
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh'; SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
...@@ -78,8 +108,11 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; ...@@ -78,8 +108,11 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}'); SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
RESET enable_seqscan; RESET enable_seqscan;
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH'); INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10; SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10;
SELECT * FROM ts_stat('SELECT a FROM test_tsvector', 'AB') ORDER BY ndoc DESC, nentry DESC, word; SELECT * FROM ts_stat('SELECT a FROM test_tsvector', 'AB') ORDER BY ndoc DESC, nentry DESC, word;
......
...@@ -145,6 +145,8 @@ select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true"; ...@@ -145,6 +145,8 @@ select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true";
select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false"; select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false";
select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true"; select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true";
select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true"; select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true";
select to_tsvector('simple', 'x y q y') @@ '!foo' AS "true";
select to_tsvector('simple', '') @@ '!foo' AS "true";
--ranking --ranking
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s'); SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment