Commit 0c4f355c authored by Alexander Korotkov's avatar Alexander Korotkov

Fix parsing of complex morphs to tsquery

When to_tsquery() or websearch_to_tsquery() meet a complex morph containing
multiple words residing adjacent position, these words are connected
with OP_AND operator.  That leads to surprising results.  For instace,
both websearch_to_tsquery('"pg_class pg"') and to_tsquery('pg_class <-> pg')
produce '( pg & class ) <-> pg' tsquery.  This tsquery requires
'pg' and 'class' words to reside on the same position and doesn't match
to to_tsvector('pg_class pg').  It appears to be ridiculous behavior, which
needs to be fixed.

This commit makes to_tsquery() or websearch_to_tsquery() connect words
residing adjacent position with OP_PHRASE.  Therefore, now those words are
normally chained with other OP_PHRASE operator.  The examples of above now
produces 'pg <-> class <-> pg' tsquery, which matches to
to_tsvector('pg_class pg').

Another effect of this commit is that complex morph word positions now need to
match the tsvector even if there is no surrounding OP_PHRASE.  This behavior
change generally looks like an improvement but making this commit not
backpatchable.

Reported-by: Barry Pederson
Bug: #16592
Discussion: https://postgr.es/m/16592-70b110ff9731c07d@postgresql.org
Discussion: https://postgr.es/m/CAPpHfdv0EzVhf6CWfB1_TTZqXV_2Sn-jSY3zSd7ePH%3D-%2B1V2DQ%40mail.gmail.com
Author: Alexander Korotkov
Reviewed-by: Tom Lane, Neil Chen
parent dfb75e47
...@@ -20,10 +20,20 @@ ...@@ -20,10 +20,20 @@
#include "utils/jsonfuncs.h" #include "utils/jsonfuncs.h"
/*
* Opaque data structure, which is passed by parse_tsquery() to pushval_morph().
*/
typedef struct MorphOpaque typedef struct MorphOpaque
{ {
Oid cfg_id; Oid cfg_id;
int qoperator; /* query operator */
/*
* Single tsquery morph could be parsed into multiple words. When these
* words reside in adjacent positions, they are connected using this
* operator. Usually, that is OP_PHRASE, which requires word positions of
* a complex morph to exactly match the tsvector.
*/
int qoperator;
} MorphOpaque; } MorphOpaque;
typedef struct TSVectorBuildState typedef struct TSVectorBuildState
...@@ -573,7 +583,14 @@ to_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -573,7 +583,14 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
MorphOpaque data; MorphOpaque data;
data.cfg_id = PG_GETARG_OID(0); data.cfg_id = PG_GETARG_OID(0);
data.qoperator = OP_AND;
/*
* Passing OP_PHRASE as a qoperator makes tsquery require matching of word
* positions of a complex morph exactly match the tsvector. Also, when
* the complex morphs are connected with OP_PHRASE operator, we connect
* all their words into the OP_PHRASE sequence.
*/
data.qoperator = OP_PHRASE;
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
pushval_morph, pushval_morph,
...@@ -603,6 +620,12 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -603,6 +620,12 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
MorphOpaque data; MorphOpaque data;
data.cfg_id = PG_GETARG_OID(0); data.cfg_id = PG_GETARG_OID(0);
/*
* parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
* single morph. Passing OP_PHRASE as a qoperator makes tsquery require
* matching of all words independently on their positions.
*/
data.qoperator = OP_AND; data.qoperator = OP_AND;
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
...@@ -634,6 +657,12 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -634,6 +657,12 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
MorphOpaque data; MorphOpaque data;
data.cfg_id = PG_GETARG_OID(0); data.cfg_id = PG_GETARG_OID(0);
/*
* parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
* single morph. Passing OP_PHRASE as a qoperator makes tsquery require
* matching of word positions.
*/
data.qoperator = OP_PHRASE; data.qoperator = OP_PHRASE;
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
...@@ -665,7 +694,13 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -665,7 +694,13 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
data.cfg_id = PG_GETARG_OID(0); data.cfg_id = PG_GETARG_OID(0);
data.qoperator = OP_AND; /*
* Passing OP_PHRASE as a qoperator makes tsquery require matching of word
* positions of a complex morph exactly match the tsvector. Also, when
* the complex morphs are given in quotes, we connect all their words into
* the OP_PHRASE sequence.
*/
data.qoperator = OP_PHRASE;
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
pushval_morph, pushval_morph,
......
This diff is collapsed.
...@@ -554,10 +554,10 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'), ...@@ -554,10 +554,10 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'),
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT); CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
\set ECHO none \set ECHO none
\copy test_tsquery from stdin \copy test_tsquery from stdin
'New York' new & york | big & apple | nyc 'New York' new <-> york | big <-> apple | nyc
Moscow moskva | moscow Moscow moskva | moscow
'Sanct Peter' Peterburg | peter | 'Sanct Peterburg' 'Sanct Peter' Peterburg | peter | 'Sanct Peterburg'
'foo bar qq' foo & (bar | qq) & city foo & bar & qq foo & (bar | qq) & city
1 & (2 <-> 3) 2 <-> 4 1 & (2 <-> 3) 2 <-> 4
5 <-> 6 5 <-> 7 5 <-> 6 5 <-> 7
\. \.
...@@ -569,21 +569,21 @@ ALTER TABLE test_tsquery ADD COLUMN sample tsquery; ...@@ -569,21 +569,21 @@ ALTER TABLE test_tsquery ADD COLUMN sample tsquery;
UPDATE test_tsquery SET sample = to_tsquery('english', txtsample::text); UPDATE test_tsquery SET sample = to_tsquery('english', txtsample::text);
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new <-> york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new <-> york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new <-> york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new <-> york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new <-> york';
CREATE UNIQUE INDEX bt_tsq ON test_tsquery (keyword); CREATE UNIQUE INDEX bt_tsq ON test_tsquery (keyword);
SET enable_seqscan=OFF; SET enable_seqscan=OFF;
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new <-> york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new <-> york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new <-> york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new <-> york';
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new & york'; SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new <-> york';
RESET enable_seqscan; RESET enable_seqscan;
...@@ -593,11 +593,11 @@ SELECT ts_rewrite(ts_rewrite('new & !york ', 'york', '!jersey'), ...@@ -593,11 +593,11 @@ SELECT ts_rewrite(ts_rewrite('new & !york ', 'york', '!jersey'),
SELECT ts_rewrite('moscow', 'SELECT keyword, sample FROM test_tsquery'::text ); SELECT ts_rewrite('moscow', 'SELECT keyword, sample FROM test_tsquery'::text );
SELECT ts_rewrite('moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'::text ); SELECT ts_rewrite('moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'::text );
SELECT ts_rewrite('bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'::text ); SELECT ts_rewrite('bar & qq & foo & (new <-> york)', 'SELECT keyword, sample FROM test_tsquery'::text );
SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery'); SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'); SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery');
SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'); SELECT ts_rewrite( 'bar & qq & foo & (new <-> york)', 'SELECT keyword, sample FROM test_tsquery');
SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text ); SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text ); SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
...@@ -614,10 +614,10 @@ SELECT keyword FROM test_tsquery WHERE keyword <@ 'new'; ...@@ -614,10 +614,10 @@ SELECT keyword FROM test_tsquery WHERE keyword <@ 'new';
SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow'; SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow';
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
CREATE INDEX qq ON test_tsquery USING gist (keyword tsquery_ops); CREATE INDEX qq ON test_tsquery USING gist (keyword tsquery_ops);
SET enable_seqscan=OFF; SET enable_seqscan=OFF;
...@@ -628,10 +628,10 @@ SELECT keyword FROM test_tsquery WHERE keyword <@ 'new'; ...@@ -628,10 +628,10 @@ SELECT keyword FROM test_tsquery WHERE keyword <@ 'new';
SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow'; SELECT keyword FROM test_tsquery WHERE keyword <@ 'moscow';
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query; SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo & (new <-> york)') AS query;
SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz'); SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
SELECT to_tsvector('foo bar') @@ SELECT to_tsvector('foo bar') @@
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment